{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Conll Exploratroy Data Analasis\n",
    "\n",
    "## tag statistic\n",
    "- cost sensitive method\n",
    "\n",
    "## sentence length distribution\n",
    "- bert input pre-process"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "def getData(path):\n",
    "    with open(path, 'r', encoding='utf8') as file:\n",
    "        data_text = file.readlines()\n",
    "    \n",
    "    X, Y, x, y = list(), list(), list(), list()\n",
    "    for sample in data_text:\n",
    "        if sample != '\\n':\n",
    "            item = sample.split(' ')\n",
    "            x.append(item[0])\n",
    "            y.append(item[-1].replace('\\n', ''))\n",
    "        else:\n",
    "            X.append(x)\n",
    "            Y.append(y)\n",
    "            x, y = list(), list()\n",
    "            \n",
    "    return X, Y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 155,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_x, train_y = getData('data/eng.train')\n",
    "test_x, test_y = getData('data/eng.testa')\n",
    "val_x, val_y = getData('data/eng.testb')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## tag statistic"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import seaborn as sns\n",
    "\n",
    "def label_distribution(label: list):\n",
    "    d = pd.DataFrame([tag for tags in label for tag in tags])\n",
    "    count = d[0].value_counts()\n",
    "    sns.barplot(x=count.values, y=count.keys())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 170,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "8 6 8 8\n"
     ]
    }
   ],
   "source": [
    "train_set = set([tag for tags in train_y for tag in tags])\n",
    "test_set = set([tag for tags in test_y for tag in tags])\n",
    "val_set = set([tag for tags in val_y for tag in tags])\n",
    "\n",
    "with open('data/label.txt', 'a', encoding='utf8') as file:\n",
    "    for tag in train_set:\n",
    "        file.write(str(tag)+'\\n')\n",
    "\n",
    "print(len(train_set), len(test_set), len(val_set), len(train_set|test_set|val_set))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 157,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAD4CAYAAAAHHSreAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAVoUlEQVR4nO3de7CkdX3n8fcnwAhkhkucUWe5ZGDRYUXjAY5REcWlgqBBIZWklFoCbHYzwQu14iKIY5FZEwIiBFdj0DEXYSMBkkVTpGpLXSteirLAMzDAMALDTQRZAaMzchGBfPePfiZz5jx9zpxLn+6emferqut0/37P0/05PWf6c55L90lVIUnSeL806ACSpOFjOUiSWiwHSVKL5SBJarEcJEktuw46QC8sXry4li1bNugYkrRdWbNmzRNVtaTb3A5RDsuWLWNsbGzQMSRpu5Lk+5PNuVtJktSyQ2w5fO/hH3Pkh64adAxJ6qs1nzht3u7bLQdJUovlIElqsRwkSS2WgySpxXKQJLVYDpKkFstBktQydOWQZP8k/5hkQ5L7kvzPJAsGnUuSdiZDVQ5JAlwPfLmqXg68AlgIXDjQYJK0kxmqcgCOBX5eVX8DUFUvAGcDv59kz4Emk6SdyLCVw2HAmvEDVbUJeAg4ZPx4khVJxpKMPf/0z/oYUZJ2fMNWDgFqOuNVtbqqRqtqdNc9F/UlnCTtLIatHO4ERscPJNkLOAC4byCJJGknNGzl8HVgzySnASTZBbgM+EJVPT3QZJK0ExmqcqiqAn4L+N0kG4B7gJ8DHxloMEnayQzd33Ooqh8A7xh0DknamQ3VloMkaThYDpKkFstBktRiOUiSWiwHSVLL0J2tNBv/Yf8XM/aJ0wYdQ5J2GG45SJJaLAdJUovlIElqsRwkSS07xAHpXzx6Jw997NU9vc8DL7ijp/cnSdsTtxwkSS2WgySpxXKQJLVYDpKkFstBktRiOUiSWiwHSVJLz8ohyZOTjK9K8kiStUnWJXlnl/HNl32SvCXJxiS3JrkryaW9yihJmp5+bTlcXlUjwO8Cf53kl8aPj7v8tBn/dlUdDhwOnJjkjX3KKUmiz7uVqup7wPPA4mku/wywFthvHmNJkiboazkkeR3wr8DjzdDZ43Yp/XOX5fcFXg58q8vciiRjScb+5akX5jW3JO1s+vXZSmcnORX4GfCuqqok0Nmt1O2YwpuS3A4sBy6uqv83cYGqWg2sBvi1/fao+YsuSTufnm85JLlw89bAuOHNxxbeVFXfnsbdfLuqfg14NfCeJCO9zilJmlzPy6GqVm4+wNyD+7oHuAg4b87BJEnTNuj3OZw94VTWZV2W+Szw5iQH9TmbJO20enbMoaoWTjK+aorxbnMPAt8Yt9wzeLaSJPXVoLccJElDyHKQJLVYDpKkFstBktRiOUiSWvr1Dul5tWDpYRx4wdigY0jSDsMtB0lSi+UgSWqxHCRJLZaDJKllhzggfddjd/HGT/fmj8XdeNaNPbkfSdqeueUgSWqxHCRJLZaDJKnFcpAktVgOkqQWy0GS1GI5SJJaZl0OSZ6cYu7oJDcnuau5rBg3tyrJI83fjF6f5JQJ636wWeeOJLcl+bMku802pyRp5nq+5ZDkZcDVwJlVdShwNPCHSX5z3GKXV9UIcBLwuc0v/knOBN4KvL6qXg28FngM2KPXOSVJk5uP3UrvA75QVbcAVNUTwLnAhycuWFUbgKeBfZuhlcB7quqnzfwvquriqto0DzklSZOYj3I4DFgzYWysGd9KkiOADVX1WJJFwMKqemA6D5JkRZKxJGPPPfncnENLkraYj3IIUF3Gx4+dneRu4CZgVbf1khzfHJd4MMlRrTurWl1Vo1U1uttCD0lIUi/NuRySXNi8iK9thu4ERicsdiSwftzty6tqOfAu4Kokuze7jp5KchBAVX2lOS6xDlgw15ySpOmbczlU1cqqGmleyAE+A5yRZAQgyYuBjwOXdFn3ejq7nE5vhi4CrkiyT7NugN3nmlGSNDM9/8juqno0yanA55vjCAE+WVU3TLLKx4Crk3weuALYE7gpybPAk8CNwK29zilJmtysy6GqFk4x9y06p6F2m1s14fYaYPm4oUubiyRpQHyHtCSpxXKQJLVYDpKkFstBktRiOUiSWnp+KusgHPqSQ7nxrBsHHUOSdhhuOUiSWiwHSVKL5SBJarEcJEktloMkqWWHOFvpZ3ffzTfffMys1z/mW9/sYRpJ2v655SBJarEcJEktloMkqcVykCS1WA6SpBbLQZLU0pNySPLkJOOrkpzTZfzkJLcnuSvJHUlOnjB/TjO3LsltSU7rRU5J0vT0/X0OSV5D529EH1dVDyQ5CPhakvur6vYkZwLHAb9eVZuS7A2c3O+ckrQzG8RupXOAP62qBwCarxcBH2rmPwK8t6o2NfMbq+rKAeSUpJ3WIMrhMGDNhLEx4LAki4BFVXXftu4kyYokY0nGNj733HzklKSd1iDKIUBNMtZtrquqWl1Vo1U1uvduu/U4oiTt3HpaDkkuTLI2ydopFrsTGJ0wdgSwvtmV9FSSg3uZS5I0Mz0th6paWVUjVTUyxWKXAucnWQbQfP0IcFkzfxHwmSR7NfN7JVnRy5ySpKn142yljyb5wOYbVbV/kvOAG5LsBjwHnFtVa5tFrgAWAt9N8lwzfxmSpL5J1bR28Q+15YsW1erDj5j1+n5kt6SdUZI1VTVxNz/gO6QlSV1YDpKkFstBktRiOUiSWiwHSVJL3z94bz4sWr7cM44kqYfccpAktVgOkqQWy0GS1GI5SJJaLAdJUssOcbbSYw9v5M//+w1TLvP+y97RpzSStP1zy0GS1GI5SJJaLAdJUovlIElqsRwkSS2WgySpxXKQJLXMuBySPDnJ+KokleSQcWNnN2Ojze0Hkyxurq9McmeS25OsTfK6Zny3JBcn2ZBkXZKbk7xtdt+eJGk2ev0muDuAdwN/0tz+HWD9xIWSvAE4ETiiqp5tCmNBM/3HwFLgVc3cS4FjepxTkjSFXu9W+jJwEkCSg4GNwONdllsKPFFVzwJU1RNV9cMkewJ/AJw1bu5HVXVdj3NKkqbQ63LYBPwgyauAU4BrJ1nuq8ABSe5J8hdJNm8ZHAI8VFWbtvVASVYkGUsy9uTTG3sSXpLUMR8HpK+hs2vpZOBL3RaoqieBI4EVdLYsrk1yxkwepKpWV9VoVY0u3HPvOQWWJG1t1uWQ5MLmQPLaCVM3AL/HNrYAquqFqvpGVf0R8H7gt4F7gQOTLJptLknS3M26HKpqZVWNVNXIhPFngPOACydbN8nyJC8fNzQCfL+qngb+CvhUkgXNskuTnDrbnJKkmZuXj+yuqmu2schC4NNJ9gGep7PFsKKZ+yids53WJ/k58BRwwXzklCR1N+NyqKqFk4yvmmT8LeOuL2uuPgEcNcnyvwDObS6SpAHwHdKSpBbLQZLUYjlIklosB0lSi+UgSWqZl1NZ++0l++/N+y97x6BjSNIOwy0HSVKL5SBJarEcJEktloMkqcVykCS1WA6SpBbLQZLUYjlIklosB0lSi+UgSWqxHCRJLZaDJKllm+WQ5IUka5PcluSWJF3/vGeSVUkqySHjxs5uxkab2w8mWdxcX5nkziS3N/f/umZ8tyQXJ9mQZF2Sm5O8rTffriRpOqbzqazPVNUIQJLjgYuAYyZZ9g7g3cCfNLd/B1g/caEkbwBOBI6oqmebwljQTP8xsBR4VTP30ikeT5I0D2a6W2kv4CdTzH8ZOAkgycHARuDxLsstBZ6oqmcBquqJqvphkj2BPwDOGjf3o6q6boY5JUlzMJ1y2KPZ7XMX8Jd0frOfzCbgB0leBZwCXDvJcl8FDkhyT5K/SLJ5y+AQ4KGq2rStUElWJBlLMvb44936R5I0W9Mph2eqaqSqDgVOAK5KkimWv4bOrqWTgS91W6CqngSOBFbQ2bK4NskZM8hNVa2uqtGqGl2yZMlMVpUkbcOMditV1XeAxcCSJBc2WxRrJyx2A/B7bGMLoKpeqKpvVNUfAe8Hfhu4FzgwyaKZ5JIk9daMyiHJocAuwI+ramWzRTEyfpmqegY4D7hwivtZnuTl44ZGgO9X1dPAXwGfSrKgWXZpklNnklOSNDfTOVtpj3FbBwFOr6oXplqhqq7Zxn0uBD6dZB/geTpbDCuauY/SOdtpfZKfA08BF0wjpySpR1JVg84wZ6OjozU2NjboGJK0XUmypqpGu835DmlJUovlIElqsRwkSS2WgySpxXKQJLVYDpKkFstBktRiOUiSWiwHSVKL5SBJarEcJEktloMkqcVykCS1WA6SpBbLQZLUYjlIklosB0lSi+UgSWqZdjkkeSHJ2iS3JbklyVFTLHt0kpuT3NVcVoybW5Xkkea+1ic5ZcK6H2zWuaN5rD9Lstvsvj1J0mzMZMvhmaoaqarXAOcDF3VbKMnLgKuBM6vqUOBo4A+T/Oa4xS6vqhHgJOBzm1/8k5wJvBV4fVW9Gngt8Biwx8y+LUnSXMx2t9JewE8mmXsf8IWqugWgqp4AzgU+PHHBqtoAPA3s2wytBN5TVT9t5n9RVRdX1aZZ5pQkzcKuM1h2jyRrgd2BpcCxkyx3GHDlhLGxZnwrSY4ANlTVY0kWAQur6oHphGl2Va0AOPDAA6f1DUiSpmc2u5UOBU4ArkqSLssFqC7j48fOTnI3cBOwqtt6SY5vjks82O34RlWtrqrRqhpdsmTJDL4NSdK2zGq3UlV9B1gMLElyYfMivraZvhMYnbDKkcD6cbcvr6rlwLvolMzuza6jp5Ic1DzGV5rjEuuABbPJKUmanVmVQ5JDgV2AH1fVymaLYqSZ/gxwRpKRZtkXAx8HLpl4P1V1PZ1dTqc3QxcBVyTZp1k3dHZjSZL6aDbHHKCzC+j0qnph4kJV9WiSU4HPN8cRAnyyqm6Y5H4/Blyd5PPAFcCewE1JngWeBG4Ebp1BTknSHKWq2+GB7cvo6GiNjY0NOoYkbVeSrKmqiYcBAN8hLUnqwnKQJLVYDpKkFstBktRiOUiSWiwHSVKL5SBJarEcJEktloMkqcVykCS1WA6SpBbLQZLUYjlIklosB0lSi+UgSWqxHCRJLZaDJKnFcpAktcyqHJK8kGRtktuS3JLkqEmWW5XknC7jJye5PcldSe5IcvKE+XOauXXNY5w2m5ySpNnZdZbrPVNVIwBJjgcuAo6ZzopJXgNcChxXVQ8kOQj4WpL7q+r2JGcCxwG/XlWbkuwNnDzLnJKkWejFbqW9gJ/MYPlzgD+tqgcAmq8XAR9q5j8CvLeqNjXzG6vqyh7klCRN02y3HPZIshbYHVgKHDuDdQ+js+Uw3hjwviSLgEVVdd+27iTJCmAFwIEHHjiDh5ckbctstxyeqaqRqjoUOAG4KkmmuW6AmmSs21xXVbW6qkaranTJkiXTzS1JmoY571aqqu8Ai4ElSS5sDlSvnWKVO4HRCWNHAOubXUlPJTl4rrkkSbM353JIciiwC/DjqlrZbFGMTLHKpcD5SZY16y+jc5zhsmb+IuAzSfZq5vdqdiFJkvpkrsccoLMr6PSqemGSZT+a5AObb1TV/knOA25IshvwHHBuVW2+vyuAhcB3kzzXzF+GJKlvUjWtXfxDbXR0tMbGxgYdQ5K2K0nWVNXE3fyA75CWJHVhOUiSWiwHSVKL5SBJarEcJEktloMkqcVykCS1WA6SpJYd4k1wSX4G3D3oHF0sBp4YdIgJhjETDGcuM03fMOYaxkwwXLl+taq6fnLpbD8+Y9jcPdm7/AYpydiw5RrGTDCcucw0fcOYaxgzwfDmmsjdSpKkFstBktSyo5TD6kEHmMQw5hrGTDCcucw0fcOYaxgzwfDm2soOcUBaktRbO8qWgySphywHSVJbVW3XF+AEOu9xuBf48Dzc/wHAPwPfo/P3r/9bM/4rwNeADc3Xfcetc36T527g+HHjRwJ3NHOfYstuvRcB1zbjNwHLppltF+BW4J+GIROwD/APwF3N8/WGQWdq1ju7+bdbB/wdsHu/cwF/DTwGrBs31pcMwOnNY2yg81cbt5XrE82/4e3Al4B9+pmrW6Zxc+cABSwehueqGT+reew7gUv6nWu+LvP+APMavvPieB9wMLAAuA14ZY8fYylwRHN9EXAP8ErgEpoyAj4MfLy5/somx4uAg5p8uzRzN9N5wQzwf4C3NePvBT7bXH83cO00s30QuJot5TDQTMCVwH9tri+gUxaDzrQf8ACwR3P7OuCMfucC3gwcwdYvwvOegU4B3d983be5vu82cr0V2LW5/vF+5+qWqRk/APgK8H2achiC5+o/Av8XeFFz+yX9zjVfl4G/wM8pfOcJ/sq42+cD58/zY/4jcByd3waWNmNL6bwRr5Wh+WF+Q7PMXePGTwE+N36Z5vqudN49mW3k2B/4OnAsW8phYJmAvei8CGfC+KCfp/2AHzT/sXYF/onOi1/fcwHL2PqFZd4zjF+mmfsccMpUuSbM/RbwxX7n6paJzlbpa4AH2VIOA32u6Pyy8Rtdnre+5pqPy/Z+zGHzf/zNHm7G5kWSZcDhdDb5XlpVjwI0X1+yjUz7Nde7Zf23darqeWAj8OJtxPkkcC7wr+PGBpnpYOBx4G+S3JrkL5P88oAzUVWPAJcCDwGPAhur6quDztXoR4a5/h/5fTq/3Q40V5J3Ao9U1W0Tpgb9XL0CeFOSm5J8M8lrhyTXnG3v5ZAuYzUvD5QsBP438IGq2jSLTFNlndH3keRE4LGqWjNFjr5movObzhHAFVV1OPAUnV0lg8xEkn2Bk+hs2v874JeTnDroXNvQywyzzpZkJfA88MVB5kqyJ7ASuKDb9CAyjbMrnV09rwc+BFyXJEOQa86293J4mM5+yM32B37Y6wdJshudYvhiVV3fDP8oydJmfimdA1VTZXq4ud4t67+tk2RXYG/gX6aI9EbgnUkeBK4Bjk3ytwPO9DDwcFXd1Nz+BzplMchMAL8BPFBVj1fVc8D1wFFDkIs+ZZjV/5EkpwMnAv+pmn0ZA8z17+mU+23Nz/z+wC1JXjbATJs9DFxfHTfT2ZJfPAS55m6+91vN54VOa99P5wdn8wHpw3r8GAGuAj45YfwTbH0w8ZLm+mFsfSDqfrYciPound8wNh+Iensz/j62PhB13QzyvYUtxxwGmgn4NrC8ub6qyTPoTK+jcxbJns39XUnn7JK+56K9v3reM9A51vIAnd9u922u/8o2cp0ArAeWTFiub7kmZpqQ40G2HHMY9HN1JvCx5vor6Oz+Sb9zzcdl4C/wc/4G4O10ziC6D1g5D/d/NJ1NuNuBtc3l7XT2BX6dzqllX5/wg72yyXM3zZkIzfgondMp7wP+nC2nsO0O/D2dU9huBg6eQb63sKUcBpoJGAHGmufqy80P8sCfJ+B/0Dk1cx3wv5r/sH3NRecU2keB5+j8Jvhf+pWBznGDe5vLf55GrnvpvMitbS6f7WeubpkmZH6QrU9lHeRztQD42+ZxbgGO7Xeu+br48RmSpJbt/ZiDJGkeWA6SpBbLQZLUYjlIklosB0lSi+UgSWqxHCRJLf8fRdw2EXypOvkAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "label_distribution(train_y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 160,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZMAAAD4CAYAAAApWAtMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAVF0lEQVR4nO3df5BlZX3n8fcn/AiwM4BxRjMlzA4sLGxAbYeOP4la1CJoNJKKKaWKgLVbmaBirbjID8cisyYENCCuxhCHNVGyy0K2Vs2Sqi21TIwWZYE9MAwDzvAjoKKsA0YZfokwfvePe8Zc2+6e7n5u9709vF9Vt+69z/Occ7/9QPdnnnPOvTdVhSRJLX5p2AVIkpY+w0SS1MwwkSQ1M0wkSc0ME0lSs32HXcAgrFixotasWTPsMiRpSdm0adPDVbVyEPvaK8JkzZo1TExMDLsMSVpSknxrUPvyMJckqdlesTL55gM/4IT3XTPsMiRpUW360zOHXcLPuDKRJDUzTCRJzQwTSVIzw0SS1MwwkSQ1M0wkSc0ME0lSs5ELkySHJfnbJHcnuTfJf02y/7DrkiRNb6TCJEmAzwKfr6qjgX8LLAMuGWphkqQZjVSYACcBP66qvwKoql3AucB/SHLQUCuTJE1r1MLkOGBTf0NV7QS+DRzV355kXZKJJBPPPPHoIpYoSZps1MIkQM2mvao2VtV4VY3ve9DyRSlOkjS1UQuTO4Dx/oYkBwOHA/cOpSJJ0h6NWph8GTgoyZkASfYBrgA+XVVPDLUySdK0RipMqqqA3wZ+N8ndwF3Aj4H3D7UwSdKMRu77TKrqO8Cbhl2HJGn2RmplIklamgwTSVIzw0SS1MwwkSQ1M0wkSc1G7mqu+fh3hz2XiT89c9hlSNKzlisTSVIzw0SS1MwwkSQ1M0wkSc32ihPwP3nwDr79wRcuyL5XX3z7guxXkvYmrkwkSc0ME0lSM8NEktTMMJEkNTNMJEnNDBNJUjPDRJLUbGBhkuSxado3JPluks1Jtib5rSnad98OTfLaJI8kuTXJtiSXD6pGSdLCWKyVyZVVNQb8LvCXSX6pv73v9qOu/WtV9RLgJcAbk7xqkeqUJM3Doh7mqqpvAs8AK2Y5/klgM/CCBSxLktRoUcMkycuAnwIPdU3n9h3i+ocpxj8HOBr46hR965JMJJn458d3LWjdkqSZLdZnc52b5AzgUeCtVVVJoHeYa6pzIr+RZAtwDHBZVf2/yQOqaiOwEeBFLziwFq50SdKeDHxlkuSS3auNvubd50Z+o6q+NovdfK2qXgS8EHhHkrFB1ylJGpyBh0lVrd99Qn0A+7oLuBS4oLkwSdKCGfb7TM6ddGnwminG/AXw6iRHLHJtkqRZGtg5k6paNk37hhnap+q7H/hK37gn8WouSRppw16ZSJL2AoaJJKmZYSJJamaYSJKaGSaSpGaL9Q74BbX/quNYffHEsMuQpGctVyaSpGaGiSSpmWEiSWpmmEiSmu0VJ+C37djGqz4+2C9jvPHdNw50f5K0N3NlIklqZphIkpoZJpKkZoaJJKmZYSJJamaYSJKaGSaSpGbzDpMkj83Qd2KSm5Ns627r+vo2JPlu953vdyY5fdK27+22uT3JbUk+kmS/+dYpSVp4A1+ZJPlV4Frg7Ko6FjgR+IMkv9k37MqqGgPeDHxyd1gkORt4HfDyqnoh8OvADuDAQdcpSRqchTjM9S7g01V1C0BVPQycD1w4eWBV3Q08ATyna1oPvKOqftT1/6SqLquqnQtQpyRpQBYiTI4DNk1qm+jaf06StcDdVbUjyXJgWVXdN5sXSbIuyUSSiacfe7q5aEnS/C1EmASoKdr7285Nsh24Cdgw1XZJTunOq9yf5JW/sLOqjVU1XlXj+y3zlIokDVNzmCS5pPujv7lrugMYnzTsBODOvudXVtUxwFuBa5Ic0B3KejzJEQBV9YXuvMpWYP/WOiVJC6c5TKpqfVWNdX/4AT4BvD3JGECS5wIfAj48xbafpXcI7Kyu6VLgqiSHdtsGOKC1RknSwhr4R9BX1YNJzgCu7s6DBPhoVd0wzSYfBK5NcjVwFXAQcFOSp4DHgBuBWwddpyRpcOYdJlW1bIa+r9K7rHeqvg2Tnm8Cjulrury7SZKWCN8BL0lqZphIkpoZJpKkZoaJJKmZYSJJajbwS4OH4djnHcuN775x2GVI0rOWKxNJUjPDRJLUzDCRJDUzTCRJzQwTSVKzveJqrke3b+cfX/2aeW//mq/+4wCrkaRnH1cmkqRmhokkqZlhIklqZphIkpoZJpKkZoaJJKnZQMIkyWPTtG9Ict4U7acl2ZJkW5Lbk5w2qf+8rm9rktuSnDmIOiVJC2PR32eS5MX0vuP95Kq6L8kRwJeS/FNVbUlyNnAy8NKq2pnkEOC0xa5TkjR7wzjMdR7wJ1V1H0B3fynwvq7//cA7q2pn1/9IVX1mCHVKkmZpGGFyHLBpUtsEcFyS5cDyqrp3TztJsi7JRJKJR55+eiHqlCTN0jDCJEBN0zZV35SqamNVjVfV+CH77TfgEiVJczHQMElySZLNSTbPMOwOYHxS21rgzu7Q1uNJjhxkXZKkhTXQMKmq9VU1VlVjMwy7HLgoyRqA7v79wBVd/6XAJ5Ic3PUfnGTdIOuUJA3WYlzN9YEk79n9pKoOS3IBcEOS/YCngfOranM35CpgGfCNJE93/VcgSRpZqZrVKYqRdszy5bXxJWvnvb0fQS/p2SjJpqqafNphXnwHvCSpmWEiSWpmmEiSmhkmkqRmhokkqdmif9DjQlh+zDFekSVJQ+TKRJLUzDCRJDUzTCRJzQwTSVIzw0SS1GyvuJprxwOP8Gf/+YZp+8+54k2LWI0kPfu4MpEkNTNMJEnNDBNJUjPDRJLUzDCRJDUzTCRJzQwTSVKzOYdJksemad+QpJIc1dd2btc23j2/P8mK7vH6JHck2ZJkc5KXde37Jbksyd1Jtia5Ocnr5/fjSZIWw6DftHg78Dbgj7vnbwHunDwoySuANwJrq+qpLmD277r/CFgFHN/1PR94zYDrlCQN0KAPc30eeDNAkiOBR4CHphi3Cni4qp4CqKqHq+p7SQ4Cfh94d1/f96vqbwZcpyRpgAYdJjuB7yQ5HjgduH6acV8EDk9yV5I/T7J75XEU8O2q2rmnF0qyLslEkonHnnhkIMVLkuZnIU7AX0fvUNdpwOemGlBVjwEnAOvorVyuT/L2ubxIVW2sqvGqGl920CFNBUuS2sw7TJJc0p043zyp6wbg99jDCqOqdlXVV6rqD4FzgN8B7gFWJ1k+37okSYtv3mFSVeuraqyqxia1PwlcAFwy3bZJjklydF/TGPCtqnoC+BTwsST7d2NXJTljvnVKkhbegnwEfVVdt4chy4CPJzkUeIbeimRd1/cBeleD3Znkx8DjwMULUackaTDmHCZVtWya9g3TtL+27/Ga7uHDwCunGf8T4PzuJklaAnwHvCSpmWEiSWpmmEiSmhkmkqRmhokkqdmCXBq82J532CGcc8Wbhl2GJD1ruTKRJDUzTCRJzQwTSVIzw0SS1MwwkSQ1M0wkSc0ME0lSM8NEktTMMJEkNTNMJEnNDBNJUjPDRJLUbI9hkmRXks1JbktyS5Ipv243yYYkleSovrZzu7bx7vn9SVZ0j9cnuSPJlm7/L+va90tyWZK7k2xNcnOS1w/mx5UkLYTZfGrwk1U1BpDkFOBS4DXTjL0deBvwx93ztwB3Th6U5BXAG4G1VfVUFzD7d91/BKwCju/6nj/D60mSRsBcD3MdDPxwhv7PA28GSHIk8Ajw0BTjVgEPV9VTAFX1cFV9L8lBwO8D7+7r+35V/c0c65QkLaLZhMmB3WGobcB/o7dymM5O4DtJjgdOB66fZtwXgcOT3JXkz5PsXnkcBXy7qnbuqagk65JMJJl46KGp8kqStFhmEyZPVtVYVR0LnApckyQzjL+O3qGu04DPTTWgqh4DTgDW0Vu5XJ/k7XOom6raWFXjVTW+cuXKuWwqSRqwOR3mqqqvAyuAlUku6VYsmycNuwH4PfawwqiqXVX1lar6Q+Ac4HeAe4DVSZbPpS5J0nDNKUySHAvsA/ygqtZ3K5ax/jFV9SRwAXDJDPs5JsnRfU1jwLeq6gngU8DHkuzfjV2V5Iy51ClJWlyzuZrrwL7VR4CzqmrXTBtU1XV72Ocy4ONJDgWeobciWdf1fYDe1WB3Jvkx8Dhw8SzqlCQNSapq2DU0Gx8fr4mJiWGXIUlLSpJNVTU+iH35DnhJUjPDRJLUzDCRJDUzTCRJzQwTSVIzw0SS1MwwkSQ1M0wkSc0ME0lSM8NEktTMMJEkNTNMJEnNDBNJUjPDRJLUzDCRJDUzTCRJzQwTSVIzw0SS1GxeYZJkV5LNSW5LckuSV04zbkOS86ZoPy3JliTbktye5LRJ/ed1fVu71zhzPnVKkhbHvvPc7smqGgNIcgpwKfCa2WyY5MXA5cDJVXVfkiOALyX5p6rakuRs4GTgpVW1M8khwGnzrFOStAgGcZjrYOCHcxh/HvAnVXUfQHd/KfC+rv/9wDuramfX/0hVfWYAdUqSFsh8VyYHJtkMHACsAk6aw7bH0VuZ9JsA3pVkObC8qu7d006SrAPWAaxevXoOLy9JGrT5rkyerKqxqjoWOBW4JklmuW2AmqZtqr4pVdXGqhqvqvGVK1fOtm5J0gJoPsxVVV8HVgArk1zSnZjfPMMmdwDjk9rWAnd2h7YeT3Jka12SpMXTHCZJjgX2AX5QVeu7FcvYDJtcDlyUZE23/Rp650mu6PovBT6R5OCu/+DukJYkaUS1njOB3qGps6pq1zRjP5DkPbufVNVhSS4AbkiyH/A0cH5V7d7fVcAy4BtJnu76r0CSNLJSNatTFCNtfHy8JiYmhl2GJC0pSTZV1eTTDvPiO+AlSc0ME0lSM8NEktTMMJEkNTNMJEnNDBNJUjPDRJLUzDCRJDUzTCRJzQwTSVIzw0SS1MwwkSQ1M0wkSc0ME0lSM8NEktTMMJEkNTNMJEnNDBNJUrNZh0mSXUk2J7ktyS1JXjnD2BOT3JxkW3db19e3Icl3u33dmeT0Sdu+t9vm9u61PtJ9V7wkaUTNZWXyZFWNVdWLgYuAS6calORXgWuBs6vqWOBE4A+S/GbfsCuragx4M/DJ3WGR5GzgdcDLq+qFwK8DO4AD5/ZjSZIW03wPcx0M/HCavncBn66qWwCq6mHgfODCyQOr6m7gCeA5XdN64B1V9aOu/ydVdVlV7ZxnnZKkRbDvHMYemGQzcACwCjhpmnHHAZ+Z1DbRtf+cJGuBu6tqR5LlwLKqum82xXSHztYBrF69elY/gCRpYcznMNexwKnANUkyxbgANUV7f9u5SbYDNwEbptouySndeZX7pzo/U1Ubq2q8qsZXrlw5hx9DkjRo8zrMVVVfB1YAK5Nc0v3R39x13wGMT9rkBODOvudXVtUxwFvphdIB3aGsx5Mc0b3GF7rzKluB/edTpyRpccwrTJIcC+wD/KCq1ncrlrGu+xPA25OMdWOfC3wI+PDk/VTVZ+kdAjura7oUuCrJod22oXdYTZI0wuZzzgR6h6TOqqpdkwdV1YNJzgCu7s6DBPhoVd0wzX4/CFyb5GrgKuAg4KYkTwGPATcCt86hTknSIkvVVKc3lpbx8fGamJgYdhmStKQk2VRVk09LzIvvgJckNTNMJEnNDBNJUjPDRJLUzDCRJDUzTCRJzQwTSVIzw0SS1GyveNNikkeB7cOuYxZWAA8Pu4g9sMbBWQp1WuPgLIU6J9f4r6tqIJ+UO5ePUxll2wf1Ls6FlGRi1Ou0xsFZCnVa4+AshToXskYPc0mSmhkmkqRme0uYbBx2AbO0FOq0xsFZCnVa4+AshToXrMa94gS8JGm49paViSRpiAwTSVKzJR8mSU5Nsj3JPUkuHMLr35/k9iSbk0x0bb+S5EtJ7u7un9M3/qKu1u1JTulrP6Hbzz1JPtZ9ZfF8a/rLJDuSbO1rG1hNSX45yfVd+01J1gywzg1JvtvN5+YkbxhmnUkOT/IPSb6Z5I4k/6lrH5n5nKHGkZnLJAckuTnJbV2N/2XU5nEPdY7MXPbtf58ktyb5u+75cOeyqpbsjd730N8LHAnsD9wG/Noi13A/sGJS24eBC7vHFwIf6h7/WlfjLwNHdLXv0/XdDLyC3tcc/1/g9Q01vRpYC2xdiJqAdwJ/0T1+G3D9AOvcAJw3xdih1AmsAtZ2j5cDd3W1jMx8zlDjyMxlt79l3eP9gJuAl4/SPO6hzpGZy77Xfi9wLfB3o/A7vmh/dBfi1k3CF/qeXwRctMg13M8vhsl2YFX3eBW9N1X+Qn3AF7qfYRWwra/9dOCTjXWt4ef/SA+spt1jusf70ntHbQZU53S/tEOts2//fwucPKrzOanGkZxL4CDgFuBlIz6P/XWO1FwChwFfBk7iX8JkqHO51A9zvQD4Tt/zB7q2xVTAF5NsSrKua3t+VT0I0N0/r2ufrt4XdI8ntw/SIGv62TZV9QzwCPDcAdZ6TpIt6R0G271UH3qd3VL/JfT+tTqS8zmpRhihuewOy2wGdgBfqqqRnMdp6oQRmkvgo8D5wE/72oY6l0s9TKY6r7DY1zq/qqrWAq8H3pXk1TOMna7eYf4c86lpIeu9Cvg3wBjwIHDFHl5zUepMsgz438B7qmrnTEOnec0Fr3OKGkdqLqtqV1WN0ftX9UuTHD/D8KHN4zR1jsxcJnkjsKOqNs12k2leb6A1LvUweQA4vO/5YcD3FrOAqvped78D+BzwUuD7SVYBdPc7uuHT1ftA93hy+yANsqafbZNkX+AQ4J8HUWRVfb/7Zf4pcDW9+RxqnUn2o/dH+n9U1We75pGaz6lqHMW57Or6EfAV4FRGbB6nq3PE5vJVwG8luR+4DjgpyX9nyHO51MPkG8DRSY5Isj+9E0X/Z7FePMm/SrJ892PgdcDWroazumFn0TuGTdf+tu5KiSOAo4GbuyXpo0le3l1NcWbfNoMyyJr69/UW4O+rO7jaavcvQ+e36c3n0Ors9vkp4JtV9ZG+rpGZz+lqHKW5TLIyyaHd4wOBfw9sY4TmcaY6R2kuq+qiqjqsqtbQ+5v391V1BsOey/menBqVG/AGelev3AusX+TXPpLeVRK3AXfsfn16xxa/DNzd3f9K3zbru1q303fFFjBO73/Qe4E/o+3E4f+ktxR/mt6/MP7jIGsCDgD+F3APvatBjhxgnX8N3A5s6f6HXjXMOoET6S3vtwCbu9sbRmk+Z6hxZOYSeBFwa1fLVuDiQf+uDOi/93R1jsxcTqr3tfzLCfihzqUfpyJJarbUD3NJkkaAYSJJamaYSJKaGSaSpGaGiSSpmWEiSWpmmEiSmv1/wV1EjdbItnwAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "label_distribution(val_y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 161,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAD4CAYAAAAHHSreAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAQm0lEQVR4nO3de5DdZX3H8fdHSIRMAqhBm+HSwEChghpgKyIqjlPrpah01BFmEJx2TNXiVKxyMQ6magpeUOvdWK11Wgt26g1nOuqoqON0wI0ECJAQqAgoNUQrIdwE/PaP/WU42Wc32SRn9+zl/ZrZ2XOe5/mdfM83M/nk+f3OJVWFJEm9HjfoAiRJ04/hIElqGA6SpIbhIElqGA6SpMbegy6gHxYvXlxLly4ddBmSNKOsWbNmc1UdONbcrAiHpUuXMjw8POgyJGlGSfLz8eY8rSRJahgOkqTGrDitdNOdv+aEt39x0GVI0pRa84GzJu2x3TlIkhqGgySpYThIkhqGgySpYThIkhqGgySpYThIkhqGgySpYThIkhqGgySpMe3CIcnBSb6eZGOSW5P8Y5L5g65LkuaSaRUOSQJ8BfhaVR0J/BGwEFg10MIkaY6ZVuEAvAB4sKr+GaCqHgXOBf4yyYKBViZJc8h0C4djgDW9A1W1BbgdOKJ3PMnyJMNJhh+5/94pLFGSZr/pFg4BaiLjVbW6qoaqamjvBYumpDhJmiumWzjcAAz1DiTZDzgEuHUgFUnSHDTdwuG7wIIkZwEk2Qu4FPhCVd0/0MokaQ6ZVuFQVQX8BfDqJBuBm4EHgXcMtDBJmmOm3deEVtUdwMsGXYckzWXTaucgSZoeDAdJUsNwkCQ1DAdJUsNwkCQ1DAdJUsNwkCQ1DAdJUsNwkCQ1pt07pHfHHx/8JIY/cNagy5CkWcOdgySpYThIkhqGgySpYThIkhqGgySpYThIkhqz4qWsv7vrBm5/99Mm7fEPvej6SXtsSZqO3DlIkhqGgySpYThIkhqGgySpYThIkhqGgySpYThIkhqGgySpYThIkhqGgySpYThIkhp9C4ckW8cZX5nkF0nWJlmX5OVjjG/7OSDJ85Pck+SaJOuTfLBfNUqSJmaqdg4frqplwKuBzyd5XO94z89vu/EfVdVxwHHAqUlOnqI6JUlM8WmlqroJeARYPMH1DwBrgYMmsSxJ0ihTGg5JTgR+D9zdDZ3bc0rp+2OsfwJwJPDDMeaWJxlOMvyb+x6d1Lolaa6Zqu9zODfJmcC9wGuqqpLAyGmlsa4pPDfJdcBRwCVV9b+jF1TVamA1wNMP2rcmr3RJmnv6vnNIsmrbbqBneNu1hedW1Y8m8DA/qqqnA08D3phkWb/rlCSNr+/hUFUrtl1g7sNj3QxcDJy/x4VJkiZs0O9zOHfUS1mXjrHm08Dzkhw2xbVJ0pzVt2sOVbVwnPGVOxgfa+424MqedQ/gq5UkaUoNeucgSZqGDAdJUsNwkCQ1DAdJUsNwkCQ1DAdJUsNwkCQ1DAdJUmOqPnhvUs1fcgyHXjQ86DIkadZw5yBJahgOkqSG4SBJahgOkqSG4SBJahgOkqTGrHgp6/pN6zn5Yyfv9vE/fvOP+1iNJM187hwkSQ3DQZLUMBwkSQ3DQZLUMBwkSQ3DQZLUMBwkSQ3DQZLUMBwkSQ3DQZLUMBwkSY2+hEOSreOMr0zytjHGT0tyXZL1Sa5Pctqo+bd1c+uSXJvkrH7UKUmamCn/4L0kzwA+CLywqn6W5DDgO0n+p6quS/IG4IXAM6tqS5L9gdOmuk5JmssGcVrpbcA/VNXPALrfFwNv7+bfAbypqrZ08/dU1b8MoE5JmrMGEQ7HAGtGjQ0DxyRZBCyqqlt39iBJlicZTjL88NaHJ6NOSZqzBhEOAWqcsbHmxlRVq6tqqKqG5i2c1+cSJWlu62s4JFmVZG2StTtYdgMwNGrseODG7lTSfUkO72ddkqRd09dwqKoVVbWsqpbtYNkHgQuTLAXofr8DuLSbvxj4RJL9uvn9kizvZ52SpB2bilcrvTPJW7bdqaqDk5wPXJFkHvAwcF5Vre2WfApYCPwkycPd/KVIkqZMqiZ0in9aW3jownrG25+x28f7HdKS5qIka6pq9Gl+wHdIS5LGYDhIkhqGgySpYThIkhqGgySpYThIkhqGgySpYThIkhpT/n0Ok+HoJx/tG9kkqY/cOUiSGoaDJKlhOEiSGoaDJKlhOEiSGoaDJKlhOEiSGrPifQ73btjAD553ym4ff8oPf9DHaiRp5nPnIElqGA6SpIbhIElqGA6SpIbhIElqGA6SpIbhIElqGA6SpIbhIElqGA6SpMZuh0OSrTuYe06Sq5Os736W98ytTPKLJGuT3JjkjFHHvrU75vok1yb5UJJ5u1unJGnX9X3nkOQPgC8Bb6iqo4HnAH+d5M97ln24qpYBrwA+s+0f/yRvAP4MeFZVPQ34E2ATsG+/65QkjW8yTiv9DfCFqvopQFVtBs4DLhi9sKo2AvcDT+iGVgBvrKrfdvO/q6pLqmrLJNQpSRrHZITDMcCaUWPD3fh2khwPbKyqTUkWAQur6mcT+UOSLE8ynGT4nocf3uOiJUmPmYxwCFBjjPeOnZtkA3AVsHKs45K8qLsucVuSZzcPVrW6qoaqamj/eV6SkKR+2uNwSLKq+0d8bTd0AzA0atkJwI099z9cVUcBrwG+mGSf7tTRfUkOA6iqb3XXJdYB8/e0TknSxO1xOFTViqpa1v1DDvAJ4HVJlgEkeRLwPuD9Yxz7FUZOOZ3dDV0MfCrJAd2xAfbZ0xolSbum798EV1V3JTkT+Gx3HSHAR6rqinEOeTfwpSSfBT4FLACuSvIQsBX4MXBNv+uUJI0vVWNdHphZjlq0qFYfd/xuH+/XhEqai5KsqarRlwEA3yEtSRqD4SBJahgOkqSG4SBJahgOkqSG4SBJahgOkqSG4SBJahgOkqRG3z8+YxAWHXWU73KWpD5y5yBJahgOkqSG4SBJahgOkqSG4SBJahgOkqTGrHgp66Y77+HjfzfeF8095pxLXzYF1UjSzOfOQZLUMBwkSQ3DQZLUMBwkSQ3DQZLUMBwkSQ3DQZLUMBwkSQ3DQZLUMBwkSY1dDockW8cZX5mkkhzRM3ZuNzbU3b8tyeLu9ookNyS5LsnaJCd24/OSXJJkY5J1Sa5O8pLde3qSpN3R789Wuh44HXhvd/9VwI2jFyU5CTgVOL6qHuoCY343/R5gCXBsN/cU4JQ+1ylJ2oF+n1b6GvAKgCSHA/cAd4+xbgmwuaoeAqiqzVX1yyQLgNcDb+6Z+1VVfbnPdUqSdqDf4bAFuCPJscAZwOXjrPs2cEiSm5N8Msm2ncERwO1VtWVnf1CS5UmGkwxvvf+evhQvSRoxGRekL2Pk1NJpwFfHWlBVW4ETgOWM7CwuT/K6XflDqmp1VQ1V1dDCBfvvUcGSpO3tdjgkWdVdSF47auoK4LXsZAdQVY9W1ZVV9S7gHOCVwC3AoUkW7W5dkqQ9t9vhUFUrqmpZVS0bNf4AcD6warxjkxyV5MieoWXAz6vqfuBzwEeTzO/WLkly5u7WKUnadZPyTXBVddlOliwEPpbkAOARRnYMy7u5dzLyaqcbkzwI3AdcNBl1SpLGtsvhUFULxxlfOc7483tuL+1ubgaePc763wHndT+SpAHwHdKSpIbhIElqGA6SpIbhIElqGA6SpIbhIElqGA6SpIbhIElqGA6SpMakfHzGVHvywftzzqUvG3QZkjRruHOQJDUMB0lSw3CQJDUMB0lSw3CQJDUMB0lSw3CQJDUMB0lSw3CQJDUMB0lSw3CQJDUMB0lSw3CQJDUMB0lSw3CQJDUMB0lSw3CQJDUMB0lSY6fhkOTRJGuTXJvkp0mePc66lUkqyRE9Y+d2Y0Pd/duSLO5ur0hyQ5Lrusc/sRufl+SSJBuTrEtydZKX9OfpSpImYiLfIf1AVS0DSPIi4GLglHHWXg+cDry3u/8q4MbRi5KcBJwKHF9VD3WBMb+bfg+wBDi2m3vKDv48SdIk2NXTSvsB/7eD+a8BrwBIcjhwD3D3GOuWAJur6iGAqtpcVb9MsgB4PfDmnrlfVdWXd7FOSdIemEg47Nud9lkP/BMj/7MfzxbgjiTHAmcAl4+z7tvAIUluTvLJJNt2BkcAt1fVlp0VlWR5kuEkw3ffPVb+SJJ210TC4YGqWlZVRwMvBr6YJDtYfxkjp5ZOA7461oKq2gqcACxnZGdxeZLX7ULdVNXqqhqqqqEDDzxwVw6VJO3ELp1Wqqr/BhYDByZZ1e0o1o5adgXwWnayA6iqR6vqyqp6F3AO8ErgFuDQJIt2pS5JUn/tUjgkORrYC/h1Va3odhTLetdU1QPA+cCqHTzOUUmO7BlaBvy8qu4HPgd8NMn8bu2SJGfuSp2SpD0zkVcr7duzOwhwdlU9uqMDquqynTzmQuBjSQ4AHmFkx7C8m3snI692ujHJg8B9wEUTqFOS1CepqkHXsMeGhoZqeHh40GVI0oySZE1VDY015zukJUkNw0GS1DAcJEkNw0GS1DAcJEkNw0GS1DAcJEkNw0GS1DAcJEkNw0GS1DAcJEkNw0GS1JgVH7yX5F5gw6DrmCEWA5sHXcQMYJ8mxj5NzHTt0x9W1ZjfljaRj+yeCTaM98mC2l6SYXu1c/ZpYuzTxMzEPnlaSZLUMBwkSY3ZEg6rB13ADGKvJsY+TYx9mpgZ16dZcUFaktRfs2XnIEnqI8NBktSY8eGQ5MVJNiS5JckFg65nKiT5fJJNSdb1jD0xyXeSbOx+P6Fn7sKuPxuSvKhn/IQk13dzH02SbvzxSS7vxq9KsnRKn2CfJDkkyfeT3JTkhiR/243bqx5J9klydZJruz79fTdun8aQZK8k1yT5Znd/dvapqmbsD7AXcCtwODAfuBZ46qDrmoLn/TzgeGBdz9j7gQu62xcA7+tuP7Xry+OBw7p+7dXNXQ2cBAT4L+Al3fibgE93t08HLh/0c97NPi0Bju9uLwJu7vphr7bvU4CF3e15wFXAs+zTuP16K/Al4Jvd/VnZp4E3eg//kk4CvtVz/0LgwkHXNUXPfemocNgALOluL2HkjYFNT4BvdX1bAqzvGT8D+Ezvmu723oy8szODfs596NnXgRfaqx32aAHwU+BE+zRmfw4Gvgu8oCccZmWfZvpppYOAO3ru39mNzUVPqaq7ALrfT+7Gx+vRQd3t0ePbHVNVjwD3AE+atMqnQLc9P46R/xXbq1G6UyVrgU3Ad6rKPo3tI8B5wO97xmZln2Z6OGSMMV+bu73xerSj3s2qviZZCPwn8Jaq2rKjpWOMzYleVdWjVbWMkf8ZPzPJsTtYPif7lORUYFNVrZnoIWOMzZg+zfRwuBM4pOf+wcAvB1TLoP0qyRKA7vembny8Ht3Z3R49vt0xSfYG9gd+M2mVT6Ik8xgJhn+rqq90w/ZqHFX1W+BK4MXYp9FOBl6e5DbgMuAFSf6VWdqnmR4OPwGOTHJYkvmMXMD5xoBrGpRvAGd3t89m5Pz6tvHTu1dBHAYcCVzdbX/vTfKs7pUSZ406ZttjvQr4XnUnQWeS7nl9Dripqj7UM2WveiQ5MMkB3e19gT8F1mOftlNVF1bVwVW1lJF/a75XVWcyW/s06As8fbhA9FJGXoVyK7Bi0PVM0XP+d+Au4GFG/qfxV4ycl/wusLH7/cSe9Su6/myge1VENz4ErOvmPs5j75jfB/gP4BZGXlVx+KCf82726TmMbMmvA9Z2Py+1V02fng5c0/VpHXBRN26fxu/Z83nsgvSs7JMfnyFJasz000qSpElgOEiSGoaDJKlhOEiSGoaDJKlhOEiSGoaDJKnx/w5UqcCD3Z1IAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "label_distribution(test_y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## sentence length distribution"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {},
   "outputs": [],
   "source": [
    "def length_distribution(x):\n",
    "    sns.distplot([len(sentence) for sentence in x])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 162,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "E:\\anaconda\\lib\\site-packages\\seaborn\\distributions.py:2551: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n",
      "  warnings.warn(msg, FutureWarning)\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAD4CAYAAAD2FnFTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAmI0lEQVR4nO3de3RVd5338fc39yuEQLg0hEItpcXa26SArdPRXhSqFnV8ehutOj5iH8t4mRmdOs7j3Jz16Iyrjl12YGit2nrpaNUpU9G2XmqrlZbQVixQaEwLBAIEQu738H3+2Dv0eNhJTiD7nBP4vNY6K+fs/dvnfHdLzie/vX/7t83dERERSZaT6QJERCQ7KSBERCSSAkJERCIpIEREJJICQkREIuVluoCJNGPGDJ8/f36myxARmTQ2b958yN2rotadUgExf/586urqMl2GiMikYWa7RlqnQ0wiIhJJASEiIpEUECIiEkkBISIikRQQIiISSQEhIiKRFBAiIhJJASEiIpEUECIiEumUupI63b799O7jlt28dF4GKhERmXjqQYiISCQFhIiIRFJAiIhIJAWEiIhEUkCIiEgkBYSIiERSQIiISKRYA8LMlpvZDjOrN7PbI9abmd0Zrt9iZpckrPuEmW01sxfM7DtmVhRnrSIi8odiCwgzywXuAlYAi4GbzGxxUrMVwMLwsQpYE25bDXwUqHX384Fc4Ma4ahURkePF2YNYAtS7e4O79wMPACuT2qwE7vPARqDCzOaE6/KAYjPLA0qAfTHWKiIiSeIMiGpgT8LrxnDZmG3cfS/wRWA30AS0ufujUR9iZqvMrM7M6pqbmyeseBGR012cAWERyzyVNmY2jaB3sQA4Ayg1s/dEfYi7r3P3WnevraqqOqmCRUTkVXEGRCNQk/B6LscfJhqpzdXAy+7e7O4DwA+Ay2KsVUREksQZEJuAhWa2wMwKCE4yr09qsx64JRzNtIzgUFITwaGlZWZWYmYGXAVsj7FWERFJEtt03+4+aGargUcIRiHd6+5bzezWcP1aYANwLVAPdAMfCNc9bWYPAs8Cg8BzwLq4ahURkePFej8Id99AEAKJy9YmPHfgthG2/Xvg7+OsT0RERqYrqUVEJJICQkREIikgREQkkgJCREQiKSBERCSSAkJERCIpIEREJJICQkREIikgREQkkgJCREQiKSBERCSSAkJERCIpIEREJJICQkREIikgREQkUqwBYWbLzWyHmdWb2e0R683M7gzXbzGzS8Lli8zs+YRHu5l9PM5aRUTkD8V2wyAzywXuAq4huPf0JjNb7+7bEpqtABaGj6XAGmCpu+8ALkp4n73AD+OqVUREjhdnD2IJUO/uDe7eDzwArExqsxK4zwMbgQozm5PU5irg9+6+K8ZaRUQkSZwBUQ3sSXjdGC4bb5sbge+M9CFmtsrM6sysrrm5+STKFRGRRHEGhEUs8/G0MbMC4DrgeyN9iLuvc/dad6+tqqo6oUJFROR4cQZEI1CT8HousG+cbVYAz7r7gVgqFBGREcUZEJuAhWa2IOwJ3AisT2qzHrglHM20DGhz96aE9TcxyuElERGJT2yjmNx90MxWA48AucC97r7VzG4N168FNgDXAvVAN/CB4e3NrIRgBNSH46pRRERGFltAALj7BoIQSFy2NuG5A7eNsG03MD3O+kREZGS6klpERCIpIEREJJICQkREIikgREQkkgJCREQiKSBERCSSAkJERCIpIEREJJICQkREIikgREQkkgJCREQiKSBERCSSAkJERCIpIEREJJICQkREIikgREQkUqwBYWbLzWyHmdWb2e0R683M7gzXbzGzSxLWVZjZg2b2opltN7PXx1mriIj8odgCwsxygbuAFcBi4CYzW5zUbAWwMHysAtYkrPsy8BN3Pxe4ENgeV60iInK8OHsQS4B6d29w937gAWBlUpuVwH0e2AhUmNkcM5sCXAF8FcDd+929NcZaRUQkSZwBUQ3sSXjdGC5Lpc1ZQDPwNTN7zszuMbPSqA8xs1VmVmdmdc3NzRNXvYjIaS7OgLCIZZ5imzzgEmCNu18MdAHHncMAcPd17l7r7rVVVVUnU6+IiCSIMyAagZqE13OBfSm2aQQa3f3pcPmDBIEhIiJpEmdAbAIWmtkCMysAbgTWJ7VZD9wSjmZaBrS5e5O77wf2mNmisN1VwLYYaxURkSR5cb2xuw+a2WrgESAXuNfdt5rZreH6tcAG4FqgHugGPpDwFn8BfCsMl4akdSIiErPYAgLA3TcQhEDisrUJzx24bYRtnwdq46wv07799O7I5TcvnZfmSkREjqcrqUVEJJICQkREIikgREQkkgJCREQiKSBERCSSAiJD6g920jswlOkyRERGFOswVznersNdfOrBLTz9cguLZpXzvsvmZ7okEZFI6kGk2f99aCvb9rVz9Xkz2XGgg12HuzJdkohIJAVEGv26/hBP7GzmY1cv5M6bLqasMI/Hth3IdFkiIpEUEGni7nzhJy9SXVHMe5adSUlBHlcsnEHDoS4OtvdmujwRkeMoINLk8Z3NbGls46NXnU1Rfi4AC2eVA7DnSE8mSxMRiaSASJP/+EU9Z0wt4p0Xzz22rKq8kMK8HPYc6c5gZSIi0RQQE6i9Z4CdBzqOW76x4TCbXjnCh//kNRTkvfqfPMeM6mnFNCogRCQLKSAmgLvz6Nb9fPHRHaz48pN89VcvE0xUC939g/ztD3/H7ClFXF9bc9y2NdNK2N/Wy8DQ0XSXLSIyKl0HMQF2He7m8Z3NnF89lTOmFvHPD29j08stfPSqhax74ve8fKiLb31wKcUFucdtWzOthKMO+1p7OHN65G23RUQyItaAMLPlwJcJbhh0j7t/Pmm9heuvJbhh0Pvd/dlw3StABzAEDLp71t4bYseBDnIM3nVxNe+/bD53P9nAFx/dwU+27gdg9ZvO5rKzZ0RuO7eyGAhOVCsgRCSbpBQQZvZ94F7gx+6e0rEQM8sF7gKuIbjH9CYzW+/uibcOXQEsDB9LgTXhz2FvcvdDqXxeJu080MGZ00spys8lJ8f48J+8hjcumsnmXUeonT+Nc8LRSlGmFOUztThf5yFEJOukeg5iDXAz8JKZfd7Mzk1hmyVAvbs3uHs/8ACwMqnNSuA+D2wEKsxsTqrFZ4O2ngGa2npZlBQCi2aXc/PSeaOGw7CZ5YUc7uyPq0QRkROSUkC4+0/d/c+AS4BXgMfM7Ckz+4CZ5Y+wWTWwJ+F1Y7gs1TYOPGpmm81s1Ui1mdkqM6szs7rm5uZUdmdCvRSOWjpn9thBMJLK0gIOd/UdO7EtIpINUh7FZGbTgfcD/xt4juDcwSXAYyNtErEs+RtwtDaXu/slBIehbjOzK6I+xN3XuXutu9dWVVWNvhMxqG/uZEpRHrPKC0/4PWaUFdI7cJTufs3uKiLZI6WAMLMfAE8CJcDb3f06d/8vd/8LoGyEzRqBxHGdc4F9qbZx9+GfB4EfEhyyyjqHO/uZNaWI4Hz7iZleWhC+V99ElSUictJS7UHc4+6L3f3/uXsTgJkVAowyumgTsNDMFphZAXAjsD6pzXrgFgssA9rcvcnMSs2sPPycUuDNwAvj27X0aOnqpzL8gj9R08uC3sfhLp2HEJHskeow188BG5KW/YbgEFMkdx80s9XAIwTDXO91961mdmu4fm34ntcC9QTDXD8Qbj4L+GH4V3ke8G13/0mKtaZNT/8QPQNDJx0Q00rzMRQQIpJdRg0IM5tNcNK42Mwu5tVzBlMIDjeNyt03kBQsYTAMP3fgtojtGoALx3r/TGvpDr7Qp5WcXEDk5eRQUZLPIR1iEpEsMlYP4i0EJ6bnAnckLO8A/jammiaNlvAv/pPtQUBwmKlFPQgRySKjBoS7fwP4hpn9qbt/P001TRpHJjIgSgvY0th20u8jIjJRxjrE9B53/yYw38z+Mnm9u98Rsdlpo6Wrn5KC3GP3dzgZ08sK6RkYortvcAIqExE5eWMdYhqeHGikoayntZbukx/BNOzYUFcdZhKRLDHWIab/DH/+Y3rKmVxauvqpriiekPeqKAkuSG/tGZiQ9xMROVmpTtb3rwRDXXuAnxCMMPp4ePjptDQ4dJTW7n5eVz01pfbffnr3qOuHR0K1dqsHISLZIdXrIN7s7p8ys3cSXP38v4BfAKdtQDS19XLUjz9BPVYQjKQoP5fCvBxau9WDEJHskOqV1MMT8l0LfMfdW2KqZ9LY394LwNTikeYqHL9pJQXqQYhI1ki1B/E/ZvYiwSGmj5hZFdAbX1nZ71BHcFFbWeHE3XOpoiRf5yBEJGukOt337cDrgVp3HwC6OP7eDqeVQ+Foo7KiCQ4IHWISkSwxnm+38wiuh0jc5r4JrmfSGJ55tbRgAgOiuICegSE6+wYntGciInIiUh3FdD/wGuB5gntEQ3DfhtM2IA519lFSkEtuzolP851seKjr3iM9LDqJGxCJiEyEVP9MrQUWu255dszhzn5KJ/iv/IpwqOve1m4FhIhkXKqjmF4AZsdZyGRzuLN/wg8DVYQjova2ntbn/0UkS6T6DTcD2GZmzwDH5qR29+tiqWoSONTZN+EBUVaUR26OsfdIz4S+r4jIiUj1G+4f4ixiMjrU2cfMKUUT+p45ZkwtzmdvqwJCRDIv1WGuvwReAfLD55uAZ8fazsyWm9kOM6s3s9sj1puZ3Rmu32JmlyStzzWz58zs4ZT2Jk36Bodo741npFFFST57j3RP+PuKiIxXSgFhZh8CHgT+M1xUDfz3GNvkAncBK4DFwE1mtjip2QpgYfhYBaxJWv8xYHsqNabT8I19YgmI4gL26RyEiGSBVE9S3wZcDrQDuPtLwMwxtlkC1Lt7g7v3Aw9w/MV1K4H7PLARqDCzOQBmNhd4K3BPijWmzeHO4YA4+ftAJKsoyedARy/9g0cn/L1FRMYj1YDoC7/kAQgvlhtryGs1sCfhdWO4LNU2/w58Chj1m9LMVplZnZnVNTc3j1HSxGjunPhpNoZNK8nHHfa3qRchIpmVakD80sz+Fig2s2uA7wH/M8Y2UVeQJYdKZBszextw0N03j1WYu69z91p3r62qqhqr+YQY7kFM9HUQAFOLg2shGlt1HkJEMivVgLgdaAZ+B3wY2AD83RjbNAI1Ca/nAvtSbHM5cJ2ZvUJwaOpKM8uaqcUPx9yDAHQeQkQyLtVRTEcJTkp/xN3f7e53p3BV9SZgoZktMLMC4EZgfVKb9cAt4WimZUCbuze5+6fdfa67zw+3+7m7v2cc+xWrQ519FOXnUJCXar6mbnj6cF0LISKZNuo3XPjF/Q9mdgh4EdhhZs1m9tmx3tjdB4HVwCMEI5G+6+5bzexWM7s1bLYBaADqgbuBj5zEvqTN4c5+ppcWYjZx8zANy8vNYWZ5IXt1iElEMmysYyQfJzjcc6m7vwxgZmcBa8zsE+7+pdE2dvcNBCGQuGxtwnMnGCE12ns8Djw+Rp1pdairnxllBWM3PEFnVBTrYjkRybixjpHcAtw0HA4A7t4AvCdcd1pq7e4/NrFeHKqnFeschIhk3FgBke/uh5IXunszr96G9LTT2j1w7GRyHOaGPYijRzV5rohkzliHmEa7QfJpe/PkIzH3IPa1BRfK3f1kA+VFQRDdvHRebJ8nIhJlrIC40MzaI5YbMLEz1U0Sg0NH6egdPHZznzgMT/vd2j1wLCBERNJt1IBw94mfS2KSa+8dBF79Eo/DtNKgd9LS3U9NZUlsnyMiMpqJH8h/ijvSHRxZi/MQU2X43sOTAoqIZIICYpxauwcAYj3EVJCXQ3lhngJCRDJKATFOrWnoQQBUlhYoIEQkoxQQ43SsBxHjOQhQQIhI5ikgxqm1JwiIaWnoQbT3DDA4pPtCiEhmKCDGqbW7nxyD8qKJn8k1UWVpAQ4cCXssIiLppoAYp9buAaYW55OTM/ET9SWqHB7q2tUX6+eIiIxEATFOrT0DsZ+ghsSA0HkIEckMBcQ4tXb3H7tnQ5zKCvMoyM1RQIhIxiggxinuifqGmZlGMolIRsUaEGa23Mx2mFm9md0esd7M7M5w/RYzuyRcXmRmz5jZb81sq5n9Y5x1jkfcE/Ulml5WQHOnAkJEMiO2gDCzXOAuYAWwGLjJzBYnNVsBLAwfq4A14fI+4Ep3vxC4CFge3pI049rCk9TpMLO8kJauPg11FZGMiLMHsQSod/cGd+8HHgBWJrVZCdzngY1AhZnNCV93hm3yw0fGb44wMHSUjr7B2K+BGDZzShFHHZo7NZJJRNIvzoCoBvYkvG4Ml6XUxsxyzex54CDwmLs/HfUhZrbKzOrMrK65uXmiao/U1hP/PEyJZpUHM6ofbFdAiEj6xRkQURcKJPcCRmzj7kPufhEwF1hiZudHfYi7r3P3WnevraqqOpl6x5SOifoSzSgrIMfgQIduPyoi6RdnQDQCNQmv5wL7xtvG3VuBx4HlE17hOLX1pGeivmF5uTlMLyvkgHoQIpIBcQbEJmChmS0wswLgRmB9Upv1wC3haKZlQJu7N5lZlZlVAJhZMXA18GKMtabkSFd6JupLNKu8kIPt6kGISPrFNqGQuw+a2WrgESAXuNfdt5rZreH6tcAG4FqgHugGPhBuPgf4RjgSKgf4rrs/HFetqUrXRH2JZk4pYuu+dnoHhijK1w3+RCR9Yp1xzt03EIRA4rK1Cc8duC1iuy3AxXHWdiKG7wUxNU3nIABmTSnCgfqDnZxfPTVtnysioiupx6G1eyCYybUw3plcE82eEoxk2ravPW2fKSICCohxae0JrqKOeybXRNPLCijKz+G5PUfS9pkiIqCAGJcj3QNpPUENkGPGvMoSNu9SQIhIeikgxqGteyBt10Akqqks4aWDnbT36uZBIpI+CohxGD7ElG7zKktwh+d3t6b9s0Xk9KWAGIcjXek/xARQM60EM3h2tw4ziUj6KCDGoS1Nd5NLVpSfyzkzy3lWPQgRSSMFRIr6B4/S2TeYkXMQAEsWVLLp5RZ6B4Yy8vkicvpRQKQo3TO5JrvyvJn0DAzxm4bDGfl8ETn9KCBSlO6J+pK9/qzplBTk8rPtBzLy+SJy+lFApOhId/on6ktUlJ/LG86ewc+3HySYoUREJF4KiBQN3wsinRP1Jbv6vFnsa+tle1NHxmoQkdOHAiJFwxP1ZeocBMCbzp1Jbo7xP1uSb6shIjLxFBApGu5BpHMm12RV5YW8adFMHtzcyODQ0YzVISKnBwVEilp7+snNsbTO5BrlhktraO7o4xc74r3/toiIAiJFwxP1maVvJtcob1pURVV5IQ88szujdYjIqS/WgDCz5Wa2w8zqzez2iPVmZneG67eY2SXh8hoz+4WZbTezrWb2sTjrTEVb90BGDy8Ny8vN4YbaGn6+4yANzZ2ZLkdETmGxBUR4u9C7gBXAYuAmM1uc1GwFsDB8rALWhMsHgb9y9/OAZcBtEdumVWtPf0ZHMCV632XzKcjNYd0TDZkuRUROYXH2IJYA9e7e4O79wAPAyqQ2K4H7PLARqDCzOe7e5O7PArh7B7AdqI6x1jG1dA0wLQt6EBCcrL6+tobvP9vI/rbeTJcjIqeoOAOiGtiT8LqR47/kx2xjZvMJ7k/9dNSHmNkqM6szs7rm5vhO3LZ09VFZmh09CIBVV5zFUYc1j9dnuhQROUXFGRBRZ3OTLwEetY2ZlQHfBz7u7pE3ZXb3de5e6+61VVVVJ1zsaNydlq5+KksLY3n/E1FTWcL1tTV8+5nd7GnpznQ5InIKinPMZiNQk/B6LpB8hdeIbcwsnyAcvuXuP4ixzjG19w4yMOTMKMtcD+LbTx8/amleZQk5Ztzx2E6+dMNF6S9KRE5pcQbEJmChmS0A9gI3AjcntVkPrDazB4ClQJu7N1kwlvSrwHZ3vyPGGlPS0hVcRZ1Nh5gAphbn8/7L5/Ofv2zgfZfN56KailHbR4UMwM1L551UHXG9r4hkVmyHmNx9EFgNPEJwkvm77r7VzG41s1vDZhuABqAeuBv4SLj8cuC9wJVm9nz4uDauWsfS0tUHZF9AAKx+09nMKCvkH9Zv5ehRTeInIhMn1suC3X0DQQgkLlub8NyB2yK2+xXR5ycy4lBn0IOYnkXnIIaVF+Vz+4pz+evv/ZYHn23k+tqasTcSEUmBrqROwfAhpukZPAcxmnddXE3tmdP4lx9t52CHhr2KyMRQQKQgW89BDMvJMb7w7gvoGRji7x/aqvtFiMiEUECk4HBnP6UFuRTl52a6lBG9pqqMT1x9Dj9+YT/frdsz9gYiImNQQKTgcFcf08uy7/xDslVXnMUbzp7BZx/ayvamyMtGRERSpoBIQXCRXHYeXkqUm2N86YaLmFqcz59/fRN7W3syXZKITGKZvbnBJHG4s585U4syXUakqGsQbri0hq8/9QrvvedpvvHnS6ipLMlAZSIy2akHkYLJ0oMYNmdqMV97/6Uc7urnuq/8ike27k/5xHV77wCvHOpi2752Nu9qYdMrLezY38G+1h56B4ZirlxEsol6EGNw90lzDiJR7fxKHrrtcj58/2Y+fP9mLqypYHZ5ITOnFJGXY3T2DdLeO0hbzwA/f/EAjUd62NvaQ0fv4KjvO6OskHmVxcyrLKEmfDQ0d1JZWsCU4nxyMnxDJRGZOAqIMXT0BfMwTZ9EPYhh82eU8vBH38D36hq57zev8Mi2A8e1KczLYf70UuZOK2bJgkqqK4qZUVZIaWEuxQV5GNDRO0h77wCHO/vY09LD7pZu6nYdYf1v95F48XZRfg4100pYNLuc154xNX07KiKxUECMoaUzu6+BGEt+bg43L53HzUvnse6JBlq7+xkYcsoL8ygvzqMwL/eE50waGDpKU2svX3/qFVq6+tnX2sPLh7p4eEsTP9rSxHO7j7D6yrO5YG7FxO6UiKSFAmIMh4cvksvSq6jHo6wwj7LC1P6XpzIBX35uDvOml3D2zLI/aHOoo4/Nu4/wzCstXPeVX/P2C8/gs29bTFX55DpMJ3K6U0CM4XBnMFHfjCych2k0I33Bp8OM8kLe8trZfOXmi7n7yZdZ+/jvefKlZv7t3RdyzeJZGatLRMZHATGG/e3B3Eazpk6ugBiPuMKkvCifv7zmHK678Aw+8V/P86H76vjolWfziWvOwXQyWyTrKSDGsK+1l/xcm3Q9iLicSJicPbOM7936ej770Avc+fN6mjv7+dw7zic3RyEhks0UEGNoauth9tQicvRlNm7JYXLh3AoOtvfxnWd20947wJeuv4iCPF2KI5KtYv3tNLPlZrbDzOrN7PaI9WZmd4brt5jZJQnr7jWzg2b2Qpw1jqWprZc5U4szWcIpw8x482tn85lrz+NHW5pYdX+dLr4TyWKxBYSZ5QJ3ASuAxcBNZrY4qdkKYGH4WAWsSVj3dWB5XPWlqqmtJ2un2ZisPnTFWXz+Xa/jlzubWXX/ZoWESJaKswexBKh39wZ37wceAFYmtVkJ3OeBjUCFmc0BcPcngJYY6xvT0aPOfvUgYnHjknl84V0X8ORLCgmRbBVnQFQDiTcmaAyXjbfNqMxslZnVmVldc3PzCRU6kkNdfQwMOWdUqAcRh+svrVFIiGSxOAMi6qxu8oxxqbQZlbuvc/dad6+tqqoaz6Zj2t8WDHFVD2Jiffvp3cceg0edd15UzRM63CSSdeIcxdQI1CS8ngvsO4E2GbOvdTgg1IOIU+38SpaeVcnffP93rLp/M+ve+0fH7t4XNaz2RKcGEZHxibMHsQlYaGYLzKwAuBFYn9RmPXBLOJppGdDm7k0x1jQuTW3BDXcUEPG74dJ5fOFPX8cTO5v50H11dPaNPqusiMQvtoBw90FgNfAIsB34rrtvNbNbzezWsNkGoAGoB+4GPjK8vZl9B/gNsMjMGs3sg3HVOpKmtl4K83Im7UR9k80Nl87jX//0Ap76/WHeveYpGo90Z7okkdNarBfKufsGghBIXLY24bkDt42w7U1x1paK4BqIIk0LkUbXX1rDrKlFrP7Ws7z1zl/x9gvmsFhTh4tkhK6kHkVTa3AVtcQv+VzDqivO4oFNe/jm07t5XfVU3nrBHKYU5WeoOpHTk+Y5GMWulm5qpul+zpkwvayQD19xFlefN5PtTe38+093srHhMEdTvHWqiJw89SBG0NLVT3NHH4tml2e6lNNWXm4OV547iwuqK/jv3+5l/W/3sXnXEV5XPZULayoyXZ7IKU89iBG8uL8dgHNmKSAybUZ5IR+8fAE31NbQ3jvAO/7j13z6B7+jrXsg06WJnNLUgxjBzv0dAJyrHkRWMDMurKlg0exy9rX28LWnXuGn2w/wzytfy/Lz52S6PJFTknoQI9hxoINpJfm6TWaWKcrP5e/etpiHbrucqrJCbv3ms/yfb27mYEdvpksTOeUoIEbw4v4OFs0u1xDXLHV+9VQeWn05n1q+iJ+9eJBr7niC79XtwXUSW2TCKCAiuDs793ewSOcfslp+bg4feePZ/Phjf8w5s8r45INbuOXeZ9jTogvsRCaCzkFEaDzSQ1f/EItmT8l0KRIhan6mlRdVc91F1Xx+w3be/KUn+ORbFvG+y+brtqYiJ0E9iAg7whPUGuI6eeSY8d5lZ/LoX/4Jy86q5J8e3sa71z7FlsbWTJcmMmmpBxHhyZeaKcrPYfEc9SAmk+GexdXnzaKqvIgfbdnHdV/5NW+/8Az++s3ncOb00gxXKDK5KCCSuDuPbjvAHy+sorggN9PlyAkwMy6qqeDc2eU8+VIzP912gJ+80MSfLT2Tv7jybKaXaWSaSCp0iCnJC3vbaWrr5S2vnZ3pUuQkFeXncs3i2fzyk2/k3X9Uw/0bd/HH//oLPvfwtmM3gxKRkakHkeTRbfvJMbjq3JmZLkUmyE+3H+R11VOZNaWQx3c0c++vX+a+3+ziHRefwQ2XzuOSeRUaziwSQQGRYHDoKD/a0sSSBZVM0z0gTjkzy4u4vraGq8+bxRMvNfPD5/by3bpGZpQVcvG8ChbOLOOv3rxII59EQgqIBF/91cs0HOrik29ZlOlSJEaVpQW846JqVrx2Nr/b28azu4/w2LYDPLbtAN/cuIslCyo5b84UFs0uZ9GscubPKCU/V0dj5fQTa0CY2XLgy0AucI+7fz5pvYXrrwW6gfe7+7OpbDvRtje1c8djO3nz4lksP1/nH04Hhfm51M6vpHZ+JR29AzQ0dwFQt6uFn794kKPhRdn5ucaZ00uZP72EM6eXcmb4c/70EqorislTeMgpKraAMLNc4C7gGqAR2GRm6919W0KzFcDC8LEUWAMsTXHbCTF01Ln7yQbueGwn5YV5/NPK83U8+jRUXpR/bArxC2sqGBg6yqHOPg6097K/rY9DnX28sLedX+5sZmDo1ek8cnOM6opippcVMK2kgIqSfKaVFDClKJ+CvBwK8nIoTPgZPHLJyzVyc4y8nJzwZ/g6N3h+bPlI7cKf+rcqcYqzB7EEqHf3BgAzewBYCSR+ya8E7gtvPbrRzCrMbA4wP4VtJ0R3/yDfeOoV3nhOFZ975/nMLNcd5CSYxmPO1GLmTC2GmleXuzsdfYMc7uynpauPw139tHT109EzyIG2Xrr7h+juH6J/6Gha6jSDqIiICo6RoiQqYyyq9QhvEP35Kb7niG1lPKaXFfLEp9404e8bZ0BUA3sSXjcS9BLGalOd4rYAmNkqYFX4stPMdpxIsRuBu9+XUtMZwKET+Ywso/3IHqfCPoD2I6Psb45blOp+nDnSijgDIuqPgOSpNkdqk8q2wUL3dcC68ZV24syszt1r0/V5cdF+ZI9TYR9A+5FtJmI/4gyIRv6gc85cYF+KbQpS2FZERGIU5/CLTcBCM1tgZgXAjcD6pDbrgVsssAxoc/emFLcVEZEYxdaDcPdBM1sNPEIwVPVed99qZreG69cCGwiGuNYTDHP9wGjbxlXrOKXtcFbMtB/Z41TYB9B+ZJuT3g/THbhERCSKrvAREZFICggREYmkgBgHM1tuZjvMrN7Mbs90Pakwsxoz+4WZbTezrWb2sXB5pZk9ZmYvhT+nZbrWVJhZrpk9Z2YPh68n3X6EF4Q+aGYvhv9fXj/Z9sPMPhH+e3rBzL5jZkWTZR/M7F4zO2hmLyQsG7F2M/t0+Du/w8zekpmq/9AI+/Bv4b+pLWb2QzOrSFh3QvuggEhRwvQfK4DFwE1mtjizVaVkEPgrdz8PWAbcFtZ9O/Azd18I/Cx8PRl8DNie8Hoy7seXgZ+4+7nAhQT7M2n2w8yqgY8Cte5+PsFAkhuZPPvwdWB50rLI2sPflRuB14bb/Ef4XZBpX+f4fXgMON/dLwB2Ap+Gk9sHBUTqjk0d4u79wPD0H1nN3ZuGJ0B09w6CL6Nqgtq/ETb7BvCOjBQ4DmY2F3grcE/C4km1H2Y2BbgC+CqAu/e7eyuTbD8IRkAWm1keUEJwndKk2Ad3fwJoSVo8Uu0rgQfcvc/dXyYYcbkkHXWOJmof3P1Rdx8MX24kuH4MTmIfFBCpG2lakEnDzOYDFwNPA7PCa04If06GOyT9O/ApIHGio8m2H2cBzcDXwkNl95hZKZNoP9x9L/BFYDfQRHD90qNMon2IMFLtk/X3/s+BH4fPT3gfFBCpS3n6j2xkZmXA94GPu3t7pusZLzN7G3DQ3TdnupaTlAdcAqxx94uBLrL3UEyk8Pj8SmABcAZQambvyWxVsZl0v/dm9hmCQ8vfGl4U0SylfVBApC6VqUOykpnlE4TDt9z9B+HiA+HMuYQ/D2aqvhRdDlxnZq8QHN670sy+yeTbj0ag0d2fDl8/SBAYk2k/rgZedvdmdx8AfgBcxuTah2Qj1T6pfu/N7H3A24A/81cvcjvhfVBApG5STv9hZkZwvHu7u9+RsGo9MDx/7fuAh9Jd23i4+6fdfa67zyf4b/9zd38Pk28/9gN7zGz4toVXEUxjP5n2YzewzMxKwn9fVxGc25pM+5BspNrXAzeaWaGZLSC4d80zGahvTBbcZO1vgOvcvTth1Ynvg7vrkeKDYFqQncDvgc9kup4Ua34DQXdyC/B8+LgWmE4wWuOl8Gdlpmsdxz69EXg4fD7p9gO4CKgL/5/8NzBtsu0H8I/Ai8ALwP1A4WTZB+A7BOdOBgj+uv7gaLUDnwl/53cAKzJd/yj7UE9wrmH493ztye6DptoQEZFIOsQkIiKRFBAiIhJJASEiIpEUECIiEkkBISIikRQQIiISSQEhIiKR/j+lMKFS9YDrcQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "length_distribution(train_x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 163,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "E:\\anaconda\\lib\\site-packages\\seaborn\\distributions.py:2551: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n",
      "  warnings.warn(msg, FutureWarning)\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAD7CAYAAABwggP9AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAn/ElEQVR4nO3de3Sc9X3n8fdXM9LoZlm2JWMjyxewAZs7UWwnkKQJIbVJFjdt04WEQsjF4RQ2l7abkran22z3bLubLqfhhOK6hCRuAm6bhNRJ3QClJFACxgYMwTYGxfgiLNu62JJ1v333j+cZM8gjaXR5Zkb253XOHM08z++Z52tZmq9+d3N3REREhivIdQAiIpKflCBERCQtJQgREUlLCUJERNJSghARkbSUIEREJK1IE4SZrTGzvWZWb2Z3pTlvZnZPeP5lM7sq5dyXzGyXmb1iZg+ZWXGUsYqIyNtFliDMLAbcC6wFVgA3mdmKYcXWAsvCx3rgvvDaGuDzQJ27XwLEgBujilVERE4Xj/C9VwL17r4PwMw2A+uA3Sll1gGbPJit96yZVZrZ/JTYSsysHygFDo91w6qqKl+8ePEU/hNERM5szz//fLO7V6c7F2WCqAEOpbxuAFZlUKbG3XeY2V8DB4Fu4FF3f3SsGy5evJgdO3ZMLmoRkbOImR0Y6VyUfRCW5tjwdT3SljGzWQS1iyXAuUCZmd2c9iZm681sh5ntaGpqmlTAIiLyligTRANQm/J6Aac3E41U5oPAG+7e5O79wA+Bd6e7ibtvdPc6d6+rrk5bSxIRkQmIMkFsB5aZ2RIzKyLoZN4yrMwW4JZwNNNqoM3dGwmallabWamZGXAtsCfCWEVEZJjI+iDcfcDM7gQeIRiF9IC77zKz28PzG4CtwPVAPdAF3Bae22Zm3wdeAAaAF4GNUcUqIiKnszNpue+6ujpXJ7WISObM7Hl3r0t3TjOpRUQkLSUIERFJSwlCRETSUoIQEZG0opxJLWk8uO3gacc+vmphDiIRERmdahAiIpKWEoSIiKSlBCEiImkpQYiISFpKECIikpYShIiIpKUEISIiaSlBiIhIWkoQIiKSlhKEiIikpQQhIiJpKUGIiEhaShAiIpJWpAnCzNaY2V4zqzezu9KcNzO7Jzz/spldFR6/0Mx2pjzazeyLUcYqIiJvF9ly32YWA+4FrgMagO1mtsXdd6cUWwssCx+rgPuAVe6+F7gi5X3eBB6OKlYRETldlDWIlUC9u+9z9z5gM7BuWJl1wCYPPAtUmtn8YWWuBX7l7gcijFVERIaJMkHUAIdSXjeEx8Zb5kbgoSmPTkRERhVlgrA0x3w8ZcysCLgB+OcRb2K23sx2mNmOpqamCQUqIiKnizJBNAC1Ka8XAIfHWWYt8IK7Hx3pJu6+0d3r3L2uurp6kiGLiEhSlAliO7DMzJaENYEbgS3DymwBbglHM60G2ty9MeX8Tah5SUQkJyIbxeTuA2Z2J/AIEAMecPddZnZ7eH4DsBW4HqgHuoDbktebWSnBCKjPRRWjiIiMLLIEAeDuWwmSQOqxDSnPHbhjhGu7gDlRxiciIiPTTGoREUlLCUJERNJSghARkbSUIEREJC0lCBERSUsJQkRE0lKCEBGRtJQgREQkLSUIERFJSwlCRETSUoIQEZG0lCBERCQtJQgREUlLCUJERNKKdLlvSc/d2fLSYbr7B7m0ZmauwxERSUs1iBx4/VgH295o5dUjJ/netoP8sqEt1yGJiJxGCSLLhtx5dNcRZpUW8vvXXYABj+0+kuuwREROowSRZa82tnO4rYdrl59DRXEhi+aU8tieY7kOS0TkNJEmCDNbY2Z7zazezO5Kc97M7J7w/MtmdlXKuUoz+76ZvWpme8zsXVHGmi07D51gRiLOFbWVACyfX8GexnbePNGd28BERIaJLEGYWQy4F1gLrABuMrMVw4qtBZaFj/XAfSnnvg781N0vAi4H9kQVa7b09A+y9+hJlp9bQYEZAMvnVQDw+J6juQxNROQ0UdYgVgL17r7P3fuAzcC6YWXWAZs88CxQaWbzzawCeC/wTQB373P3ExHGmhVPvtZE/6Bz8bkVp45VzUiwaE4pT9c35zAyEZHTRTnMtQY4lPK6AViVQZkaYABoAr5lZpcDzwNfcPfO6MKN3k93HaG4sIDzqsrfdrysKM6LB0/w4LaDbzv+8VULsxmeiMjbRFmDsDTHPMMyceAq4D53vxLoBE7rwwAws/VmtsPMdjQ1NU0m3kgNDjmP7znG8nkVxAre/s+uKk/Q0tnHkA//9oiI5E6UCaIBqE15vQA4nGGZBqDB3beFx79PkDBO4+4b3b3O3euqq6unJPAovNxwgrbufi6YN+O0c1XlRQwOOcc7+3IQmYhIelE2MW0HlpnZEuBN4Ebg48PKbAHuNLPNBM1Pbe7eCGBmh8zsQnffC1wL7I4w1kkZ3jQEpzcP/efrQR/D+dXlp5WtnpEAoLmjlznliQgiFBEZv8gShLsPmNmdwCNADHjA3XeZ2e3h+Q3AVuB6oB7oAm5LeYv/BnzPzIqAfcPOTTv/Wd/MxedWUJ44/VteVZ5MEH1cmO3ARERGEOlaTO6+lSAJpB7bkPLcgTtGuHYnUBdlfNnS2TvACweP86lrlqQ9X1oUo6QwRlNHb5YjExEZmWZSZ8Fzb7TSP+i8Z2n6PhIzo6q8iOaTShAikj+UILLgmX0tFMULqFs8a8Qy1TMSNKsGISJ5RAkiC7bvb+XyBTMpLoyNWKaqPEF7zwC9A4NZjExEZGRKEBHr7hvklTfbqFs8e9RyydFLLR0a6ioi+UEJImIvNZygf9B55yjNSwCVJYUAtHf3ZyMsEZExKUFEbMf+VgDesXD0GkRFmCDaepQgRCQ/KEFEbPv+41x4zgxmlhaOWq48EceA9u6B7AQmIjIGJYgIDQ05Lxw4zjvGaF4CiBUY5cVx2lWDEJE8oQQRof0tnZzsHeCKBZUZla8oLuSkEoSI5AkliAjtaTwJwIqU/R9GU1EcVxOTiOQNJYgI7W5sI15gLDvn9AX60qkoKaRNo5hEJE8oQURo9+F2ls4tJxEfeYJcqoqSQrr7B+kfHIo4MhGRsSlBRGh3Yzsr5mfWvARBExNoLoSI5AcliIg0d/RytL034/4HCDqpAdp71A8hIrmnBBGRPY3tAOOrQSRnU2skk4jkASWIiCQTxPJxNTFpuQ0RyR9KEBF5/WgH1TMSzCoryvia4sICCmOmBCEieUEJIiIHWrtYMqdsXNeYGRXFheqDEJG8EGmCMLM1ZrbXzOrN7K40583M7gnPv2xmV6Wc229mvzSznWa2I8o4o3CgpZOFc0rHfd0MzaYWkTwR2Z7UZhYD7gWuAxqA7Wa2xd13pxRbCywLH6uA+8KvSe939+aoYoxK38AQR9t7WTyBBFGeiHGkXTvLiUjuRVmDWAnUu/s+d+8DNgPrhpVZB2zywLNApZnNjzCmrGjtDDb9WTTOJiaAskSczl41MYlI7kWZIGqAQymvG8JjmZZx4FEze97M1kcWZQRaOoMawOIJJIjy4jjd/YMMDGk2tYjkVmRNTIClOebjKHO1ux82s7nAY2b2qrs/edpNguSxHmDhwoWTiXfKJGsQE+mDKE8E/yVdvdqbWkRyK8oaRANQm/J6AXA40zLunvx6DHiYoMnqNO6+0d3r3L2uurp6ikKfnJaOPmaVFjKzZPRNgtIpKwoSRIeamUQkx6JMENuBZWa2xMyKgBuBLcPKbAFuCUczrQba3L3RzMrMbAaAmZUBHwJeiTDWKdXS2Tuh/geAGcVKECKSHyJrYnL3ATO7E3gEiAEPuPsuM7s9PL8B2ApcD9QDXcBt4eXnAA+bWTLGB939p1HFOtVaOvu4tGbmhK4tC5uY1FEtIrkWZR8E7r6VIAmkHtuQ8tyBO9Jctw+4PMrYojIwOERbV/+EaxDJPgjVIEQk1zSTeoq19wzgQM2skgldn4gXEC8wJQgRyTkliCmW3BFu/sziCV1vZpoLISJ5QQliiiUX2ptXMbEEAUEzk2oQIpJrShBTLLmXwzkTrEEAlCViShAiknMZJQgz+4GZfdjMlFDG0NbdT1G8gBmJiff/lycK6dREORHJsUw/8O8DPg68bmZ/ZWYXRRjTtNbe3U9FcSHhEN0JKQ9rEMEgLxGR3MgoQbj7v7v7J4CrgP0ES1/8wsxuM7PxTxc+g7X3DFBRMrnRw2WJOINDrn0hRCSnMm4yMrM5wCeBzwAvAl8nSBiPRRLZNNXe3c/M4snlzORciJYOLfstIrmT0Z+6ZvZD4CLgH4D/4u6N4al/nI6b+URlyJ32nn4qJrAGU6pTCaKzj/PyY3kpETkLZdoWcn84K/oUM0u4e6+710UQ17TU2TvAkDPpBFGmGoSI5IFMm5j+V5pjz0xlIGeC9u6gz2Bm8eT6IMrD65s6+iYdk4jIRI36SWZm8wg28Ckxsyt5a/+GCmD8mx2c4ZKzqCddgyhSDUJEcm+sP3V/naBjegFwd8rxk8AfRxTTtJWcJFcxyU7qWIFRUhijRTUIEcmhUROEu38H+I6Z/Za7/yBLMU1b7d39FNhbTUSTUZ6I06wahIjk0FhNTDe7+3eBxWb2+8PPu/vdaS47a7X3DFCeiFMwiUlySeXFcdUgRCSnxvpTN7mpQXnUgZwJOnsHpqT2AMFIpuZO1SBEJHfGamL6u/DrV7MTzvTW0Ttwag7DZJUnYhxq7ZqS9xIRmYhMF+v7v2ZWYWaFZva4mTWb2c1RBzfddPQOnBqBNFnliTht3f30DQxNyfuJiIxXpvMgPuTu7cBHgAbgAuC/j3WRma0xs71mVm9md6U5b2Z2T3j+ZTO7atj5mJm9aGY/yTDOnHH3KW9iAmjtVD+EiORGpp9myXGb1wMPuXvrWKuVmlkMuBe4jiCpbDezLe6+O6XYWmBZ+FhFsGrsqpTzXwD2EMy7yGu9A0MMDPmpJqYHtx2c1Psl36e5o5d5k9hbQkRkojKtQfzYzF4F6oDHzawa6BnjmpVAvbvvc/c+YDOwbliZdcAmDzwLVJrZfAAzWwB8GLg/wxhzKrlFaNmU9UG8tR6TiEguZLrc913Au4A6d+8HOjn9w364GuBQyuuG8FimZf4G+DIwLRrhkzvATV0ndViDOKmRTCKSG+P5NFtOMB8i9ZpNo5RP1wY1fAectGXM7CPAMXd/3sx+bbSgzGw9sB5g4cKFoxWNVOcUJ4hTC/ZpqKuI5Eimy33/A3A+sBNI7oXpjJ4gGoDalNcLgMMZlvlt4AYzux4oBirM7LvuftrIKXffCGwEqKury9kWbB3hFqFT1cSUiBdQFC/QZDkRyZlMP83qgBU+vj0wtwPLzGwJ8CZwI8G2pam2AHea2WaCzum2cK+Jr4QPwhrEH6ZLDvmkozdYh6ksEZuS9zMzqssTNGm5DRHJkUwTxCvAPKBxrIJJ7j5gZncCjwAx4AF332Vmt4fnNwBbCUZG1QNdwG3jiD2vdPQOUlxYQLwg4036xjSnvEg1CBHJmUwTRBWw28yeA079SevuN4x2UbjJ0NZhxzakPHfgjjHe42fAzzKMM2c6p3AWddKcsiLVIEQkZzL9RPvzKIM4E0zlMhtJVeUJXj1yckrfU0QkUxl9orn7z81sEbDM3f/dzEoJmo0k1Nk7QPWMxJS+55zyBC0dfbg7Y01MFBGZapmuxfRZ4PvA34WHaoAfRRTTtBRNDaKIvsEh2nsGpvR9RUQykWmP6h3A1UA7gLu/DsyNKqjpZnDI6e4bnLIhrklV5UGNRFuPikguZJogesPlMgAIJ8vlbM5BvunqG8CZuklySXPKiwBo1kgmEcmBTBPEz83sj4ESM7sO+Gfgx9GFNb10TvEkuaQ5ZapBiEjuZJog7gKagF8CnyMYuvqnUQU13XT1hQv1FU1tv31VsgahBftEJAcyHcU0ZGY/An7k7k3RhjT9dPYFNYjSKa5BzC4LEoRqECKSC6PWIMINff7czJqBV4G9ZtZkZn+WnfCmh6hqEPFYAbNKC2lWghCRHBiriemLBKOX3unuc9x9NsGaSVeb2ZeiDm66SPZBlE7RdqOpknMhRESybawEcQtwk7u/kTzg7vuAm8NzQlCDSMQLiBVM/WS2Kq3HJCI5MlaCKHT35uEHw36IwjTlz0pdEcyBSJpTnlATk4jkxFgJYrQ/XfVnbaizd4DSKe5/SKoqK1KCEJGcGOvP3svNrD3NcSPYyEcIahBTPUkuqao8QXvPAH0DQxTFp24pcRGRsYz6qebuWpAvA519A8yd4oX6kuaEy220dvYxb6Zysohkj/4knQLR9kEkl9tQM5OIZJcSxCT19A/SNzAUXR9EmCC0cZCIZJsSxCSd6Ar3oo5gDgTA3BlBs1JTuxKEiGRXpAnCzNaY2V4zqzezu9KcNzO7Jzz/spldFR4vNrPnzOwlM9tlZl+NMs7JaA3XSSpNRFODmFsR9EEcae+J5P1FREYSWYIwsxhwL7AWWAHcZGYrhhVbCywLH+uB+8LjvcAH3P1y4ApgjZmtjirWyTjeFSaIiGoQiXiM2WVFShAiknVR1iBWAvXuvi/cS2IzsG5YmXXAJg88C1Sa2fzwdUdYpjB85OX+E6dqEBH1QQCcU1HM0TYlCBHJrigTRA1wKOV1Q3gsozJmFjOzncAx4DF33xZdqBOXrEFENYoJYF5FQjUIEcm6KBNEuoWJhtcCRizj7oPufgWwAFhpZpekvYnZejPbYWY7mpqyvxJ5sgZRUhhdDWLezGKOKkGISJZFmSAagNqU1wuAw+Mt4+4ngJ8Ba9LdxN03unudu9dVV1dPMuTxO97ZR0lhLJKF+h7cdpAHtx3kWHsvzR19bHpm/5TfQ0RkJFEmiO3AMjNbYmZFwI3AlmFltgC3hKOZVgNt7t5oZtVmVglgZiXABwn2o8g7rV39kfY/AFSUBOsinuwZiPQ+IiKpIms4d/cBM7sTeASIAQ+4+y4zuz08v4Fg69LrgXqgC7gtvHw+8J1wJFQB8E/u/pOoYp2ME119kfY/AFQUBwmivbs/0vuIiKSK9JPN3bcSJIHUYxtSnjtwR5rrXgaujDK2qdLa2ZeFGkTw39SuGoSIZJFmUk/S8c6+yGZRJ81UDUJEckAJYpJau/oim0WdVFIUI15gShAiklVKEJPQ3TdIT/9QZLOok8yMipJC2nqUIEQke5QgJqE1OUku4j4IgIriOO3d6oMQkexRgpiE453RrsOUamZJIW3d2uVVRLJHCWISkrOoyyLugwCYXVZEW3c//YNDkd9LRASUICYl6pVcU80uK2LI4fCJ7sjvJSICShCTkmxiykYfxOyyYF+Ig61dkd9LRASUICaltaufAoPirCSIYOvRAy1KECKSHUoQk3C8s4/K0iIKbOoX6htuRnGceIFxSDUIEckSJYhJaO3qY1ZpYVbuVWDGrNIi1SBEJGuUICbheGcfs0qLsna/2WVF6oMQkaxRgpiE1s4+ZpVlP0EEaxyKiERLCWISjnf1MTvLNYiO3gGOd2nJDRGJnhLEBLk7xzv7s16DADjQ0pm1e4rI2UsJYoI6+wbpGxxidll2OqlBQ11FJLuUICYoOUkum53Uc8qLKIwZrx45mbV7isjZSwliglpzkCDiBQUsnTuDPY3tWbuniJy9Ik0QZrbGzPaaWb2Z3ZXmvJnZPeH5l83sqvB4rZk9YWZ7zGyXmX0hyjgnoqWzF4CqGYms3nf5fCUIEcmOyBKEmcWAe4G1wArgJjNbMazYWmBZ+FgP3BceHwD+wN2XA6uBO9Jcm1PNHUENYk4WO6kBVsyv4NjJXlo6erN6XxE5+0RZg1gJ1Lv7PnfvAzYD64aVWQds8sCzQKWZzXf3Rnd/AcDdTwJ7gJoIYx23lmSCKM9uglg+vwKAPY3qhxCRaEWZIGqAQymvGzj9Q37MMma2GLgS2Db1IU5cS0cvpUWxrCz1nSqZIHY3tmX1viJy9okyQaRbwW74FOBRy5hZOfAD4Ivunrbh3czWm9kOM9vR1NQ04WDHq6WzL+u1BwiGup5TkVANQkQiF2WCaABqU14vAA5nWsbMCgmSw/fc/Ycj3cTdN7p7nbvXVVdXT0ngmWju6GVOWXY7qJOWz69g92F1VItItKJMENuBZWa2xMyKgBuBLcPKbAFuCUczrQba3L3RzAz4JrDH3e+OMMYJa+nooyoHNQiAK2oree3YSU50aY9qEYlOZAnC3QeAO4FHCDqZ/8ndd5nZ7WZ2e1hsK7APqAf+Hvi98PjVwO8CHzCzneHj+qhinYiWztzVIN6zrAp3+MWvWnJyfxE5O0Taw+ruWwmSQOqxDSnPHbgjzXX/Sfr+ibzg7rR05KYPAuCyBZWUJ+I89Xoz1186f9zXP7jt4GnHPr5q4VSEJiJnEM2knoD27gEGhpw55bmpQRTGClh93hyerm/Oyf1F5OygBDEBzclZ1DmqQUDQzHSwtYuDWrhPRCKiBDEBpybJ5agPAuDqpVUAPLH3WM5iEJEzmxLEBCSXuchVHwTA+dVlXHxuBZu3H9IOcyISiexOAz5DNHfmZpmNVGbGJ1Yt4o8f/iUvHDzB3jRLgKvjWUQmQzWICWg+GdQgsrndaDo3XHEu5Yk439t2IKdxiMiZSQliAlo6e6ksLSQey+23rzwR5zevquHHLx3maHtPTmMRkTOPEsQEtHT0ZX2Z75F8/tpllCXiPPzimwypL0JEppASxAQ0neylKkdzIIarKk/wZx9ZwcHWLh7bfVQd1iIyZZQgJuBIew/zZhbnOoxTPnplDXWLZvHz15r4l5cO09M/OGJZd6e3f1C1DREZk0YxjZO7c6y9N68ShJnx0StrKCmM8VR9M79saOOieTM4Hi7m13Syl0OtXRw63kXD8W66+gYxgqXDl84tZ+WS2bn9B4hIXlKCGKfWzj76BoeYV5E/CQKCJLH20vlcVlvJk681UX+sgxcPnQCCzuwFs0pYNKeMa5ZWc/hEN70DQxw+0c0LB4+z7Y1W9jSe5KvrLqamsuRt76t1m0TOXkoQ43QkHC2UbwkiqaayhJtWBh/gH72yhoICKIoVEKygHkj90O/uG+TZN1p4ur6Z6+7+OX/4oQu59d2LiRXk7VqJIpIl6oMYp+Rw0nPyqIlpJCVFMRLx2NuSQ7oy779wLo9+6b2sXDKb//mT3fzWfb+g/lhHFiMVkXykGsQ4Nbbldw1iohbMKuVbn3wnW146zP/YsosP3/MUX15zEYl4AQWjJJhUao4SObOoBjFOR9t6MIPqGfkxzHUqmRnrrqjh0S+9l2uWVvEXP9nN3z+1j8a27lyHJiI5oAQxTkfae6gqT1CY41nUUZo7o5j7b63ja799GU0ne/nGf9Tz/ecPaba2yFlGTUzjdKS9l/k57H9I14wTBTPjY3W1tHX388Srx3hufysvHDzBuTOLOdDSySU1M1lSVcbCOaVUFBdmJSYRya5IE4SZrQG+DsSA+939r4adt/D89UAX8El3fyE89wDwEeCYu18SZZzjcbSth4VzSnMdRtaUFsX58GXn8r4L57Lz0AleebONbz29n77BoVNlZpcVsWhOKeVFcS6vreTcYUNlRWR6iixBmFkMuBe4DmgAtpvZFnffnVJsLbAsfKwC7gu/Anwb+AawKaoYJ+JIe89ZObGsPBHnmqVVXLO0it96Rw37mjo50NLFgZZO9rd0sa+pg1/8qoWn6pu58JwZrL1kHnPPsI58kbNNlDWIlUC9u+8DMLPNwDogNUGsAzZ5sIDQs2ZWaWbz3b3R3Z80s8URxjduPf2DtHX359Us6tFE1RyViMdYPr+C5fMr3nb8m0+9wXP7W/n5a8f4xhP1XH/pfG5aWTvqMFsRyV9R9rTWAIdSXjeEx8ZbJm8cCYe4nqO/jNMqKYrxvguq+dIHL+C86rJTQ2aHhrTuk8h0FGUNIt2fjcM/KTIpM/pNzNYD6wEWLox2zP2ZOgcCpra2MaO4kFvetZhHXjnCpmcO0NrZx//7nctJxGNTdg8RiV6UCaIBqE15vQA4PIEyo3L3jcBGgLq6ukj/VD3Y2gnAorOokzqdTJJJQbg21DXLqvjLf3uVE139/N3vvoOyhAbOiUwXUTYxbQeWmdkSMysCbgS2DCuzBbjFAquBNndvjDCmSXmjuYuiWIFG6YzD5953Pl/77ct4Zl8Ln7h/GyfCFWZFJP9FliDcfQC4E3gE2AP8k7vvMrPbzez2sNhWYB9QD/w98HvJ683sIeAZ4EIzazCzT0cVa6b2N3dSO7tEC9mN08fqavnbT1zF7sPt3LjxWY6d1IQ7kekg0vq+u28lSAKpxzakPHfgjhGuvSnK2CZif0sni+eU5TqMaenXL57Ht257J5/dtIOPbXiGb9+2kiVV+l6K5DM1CGdoaMjZ39LJ1Uurch3KtHX10iq++5lVfPrb21n79Sf5r3W1XDjvraGyWthPJL8oQWTo6MkeevqHWKy/esclXYf2Z645j+9uO8CmZw7wwRXn8L4LqjNeMVZEsufMXXFuir3RHIxgWqImpkmbVVbE5957PpfXVvLY7qN899kDdPUN5DosERlGCSJDB1q6AFhcdXYPcZ0qRfECPvaOBXzksvm8frSDbzxRz85wi1QRyQ9KEBna39xJUbyAc2dqiOtUMTPefX4V6997HgAf2/ALNj+XndVqRWRsShAZeqO5k0WzSynQENcpVzu7lDvfv5R3nV/FXT/8JV975FUtzyGSB5QgMvTa0ZOcX12e6zDOWKVFcb55ax03razl3id+xRf/cSe9A4O5DkvkrKYEkYHjnX3sb+ni8trKXIdyRiuMFfC/P3opX15zIVteOsytDzxHe09/rsMSOWspQWQg2Xl65cLKnMZxpntw20Eeeu4QlSVF/E5dLdvfOM6H7n5SW52K5IgSRAZePHicAoNLa2bmOpSzxhW1ldz67sW0dvXxm3/7C14/ejLXIYmcdZQgMvDioRNcOK9CK5Fm2dK55Xz2PefROzDEb9z7NP/6ct6u4yhyRlKCGMPQkLPz0Ak1L+VITWUJn75mCbPLirjjwRf46L1Pc/+T+3IdlshZQX8Sj2Ffcycnewa4Qh3UOTOzpJDPvvc8fra3iZ/tPcbuxna6+gf51DVLKFetTiQyqkGM4cnXmgCoWzQrx5Gc3eIFBXxw+Tnc+YFlnF9dzt2PvcZ7/s9/cO8T9bR2ao8JkSgoQYzhRzvf5JKaCs7THIi8MK+imJtXL+Jf7riayxZU8rVH9vKuv3ycu37wMnuPqCNbZCqpfj6K+mMdvNzQxp9+eHmuQ5FhLq+t5DufWslrR0/yraf38/CLDWzefoh3nz+HT129hA9cNFez3kUmSQliFA+/2ECBwQ1XnJvrUGSY1GXEL62ZyflVZWw/cJxn97XwmU07mF1WxLvOm8OVtZV8JlzrSUTGRwliBCe6+vjH7Ye4Zlk1c2cU5zocGUNpIs77LqjmmqVV7G5s5+n6Zv71l4382yuNPLOvhRuuOJdfu3AuM0sKcx2qyLQRaYIwszXA14EYcL+7/9Ww8xaevx7oAj7p7i9kcm3Uvvrj3Zzo6ueP1lyYzdvKJMUKjEtrZnJpzUwa27rZeegEuxvbefzVY8QKjCtqK3nPsiresWgWl9bMpLK0KNchi+StyBKEmcWAe4HrgAZgu5ltcffdKcXWAsvCxyrgPmBVhtdGYmjI2fjUPh5+8U0+f+0yLj5Xs6enq/kzS5g/s4Qhdw62dPH6sZO8fqyDr//76yTXil0wq4QlVWXMqyhm3sxiZpUWMaM4zoziQipK4lQUFzKj+K2v8ZjGdcjZI8oaxEqg3t33AZjZZmAdkPohvw7Y5O4OPGtmlWY2H1icwbVTprtvkEPHu3jp0An++fkGnnujlTUXz+PO9y+N4naSZQVmLK4qY3FVGdetCP6/3zzRTfWMBLsOt9FwvJvXjzZz7GQPY60yXhgzyorilBfHuWjeDKpnJKgqT1CeiFNaFKO0KE5ZIkYiHiNWYKc9hm+tGvzop7xOc08/7WC6UuE9zCgoIOV58DX1tsZbL1KPJ8ueijf1vVLOmbaHnTR3xz34nxw69dxP/V8PhU8Kwv+7ArPwQVa//1EmiBrgUMrrBoJawlhlajK8dkoMDjmXf/VR+gaHgoAqS/iLdRdz8+pF+kU4Q5UUxVg6Nxi2/O7zq04dHxxyevsH6RkYoqd/MOUxRM/AW8+7+gY42TPAmyd62HmojdbO3jETy5nEjFPJIpO9xD1tQktTLoNiGb1Thv8XmcQ1ZTERJoUM33M0byWMIFkUGFTPSPDUlz8wuTdOI8oEke4nZ/i3ZqQymVwbvIHZemB9+LLDzPZmHGEaB4BfALeM77IqoHky980yxRu96Raz4o1WpPHuBeyPJnz5opFORJkgGoDalNcLgMMZlinK4FoA3H0jsHGywU6Gme1w97pcxjAeijd60y1mxRut6RZvUpQ9btuBZWa2xMyKgBuBLcPKbAFuscBqoM3dGzO8VkREIhRZDcLdB8zsTuARgqGqD7j7LjO7PTy/AdhKMMS1nmCY622jXRtVrCIicrpI50G4+1aCJJB6bEPKcwfuyPTaPJbTJq4JULzRm24xK95oTbd4AbDhw+xERERAq7mKiMgIlCAmwczWmNleM6s3s7tyHc9wZlZrZk+Y2R4z22VmXwiPzzazx8zs9fBrXm12YWYxM3vRzH4Svs73eCvN7Ptm9mr4vX5XPsdsZl8Kfx5eMbOHzKw43+I1swfM7JiZvZJybMQYzewr4e/hXjP79TyJ92vhz8TLZvawmVXmS7yZUoKYoJTlQNYCK4CbzGxFbqM6zQDwB+6+HFgN3BHGeBfwuLsvAx4PX+eTLwB7Ul7ne7xfB37q7hcBlxPEnpcxm1kN8Hmgzt0vIRgEciP5F++3gTXDjqWNMfyZvhG4OLzmb8Pfz2z6NqfH+xhwibtfBrwGfAXyJt6MKEFM3KmlRNy9D0guB5I33L0xufihu58k+OCqIYjzO2Gx7wC/kZMA0zCzBcCHgftTDudzvBXAe4FvArh7n7ufII9jJhicUmJmcaCUYI5RXsXr7k8CrcMOjxTjOmCzu/e6+xsEoyJXZiPOpHTxuvuj7j4QvnyWYD4X5EG8mVKCmLiRlgnJS2a2GLgS2AacE843Ifw6N4ehDfc3wJeBoZRj+RzveUAT8K2wWex+MysjT2N29zeBvwYOAo0Ec48eJU/jHWakGKfD7+KngH8Ln0+HeAEliMnIeDmQXDOzcuAHwBfdvT3X8YzEzD4CHHP353MdyzjEgauA+9z9SqCT3DfPjChst18HLAHOBcrM7ObcRjVpef27aGZ/QtDc+73koTTF8ibeVEoQE5fJUiI5Z2aFBMnhe+7+w/Dw0XDVXMKvx3IV3zBXAzeY2X6CJrsPmNl3yd94Ifg5aHD3beHr7xMkjHyN+YPAG+7e5O79wA+Bd5O/8aYaKca8/V00s1uBjwCf8LfmFORtvMMpQUxc3i8HYmZG0Da+x93vTjm1Bbg1fH4r8C/Zji0dd/+Kuy9w98UE38//cPebydN4Adz9CHDIzJI7S11LsCx9vsZ8EFhtZqXhz8e1BH1T+RpvqpFi3ALcaGYJM1tCsL/MczmI720s2PTsj4Ab3L0r5VRexptWsC65HhN5ECwT8hrwK+BPch1PmviuIai6vgzsDB/XA3MIRoG8Hn6dnetY08T+a8BPwud5HS9wBbAj/D7/CJiVzzEDXwVeBV4B/gFI5Fu8wEMEfST9BH9xf3q0GIE/CX8P9wJr8yTeeoK+huTv3oZ8iTfTh2ZSi4hIWmpiEhGRtJQgREQkLSUIERFJSwlCRETSUoIQEZG0lCBERCQtJQgREUlLCUJERNL6/8SmXas99v0mAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "length_distribution(val_x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 164,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "E:\\anaconda\\lib\\site-packages\\seaborn\\distributions.py:2551: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n",
      "  warnings.warn(msg, FutureWarning)\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAD4CAYAAAAdIcpQAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAndklEQVR4nO3de3RddZ338ff35J40TdomvSVt0xuXUm4l0iqiKKItOnQeHWcVRJTRp6LFUefioM56luPMmtFnZvmMrAcpF1FQEUdUnoodES+AKK0tAqX3hrTQtGmS3nJt7t/nj7NTTkOSc9Kcfc5J+nmtdVbO2fu3z/7uNjnf87tuc3dERERGEkl3ACIikvmULEREJC4lCxERiUvJQkRE4lKyEBGRuLLTHUAylZWVeVVVVbrDEBEZN55//vmj7l4er9yEShZVVVVs3bo13WGIiIwbZvZqIuXUDCUiInEpWYiISFyhJgszW2lme8ysxszuGGK/mdmdwf5tZrYs2H6+mb0Y82gxs8+GGauIiAwvtD4LM8sC7gKuA+qALWa2wd13xhRbBSwOHsuBu4Hl7r4HuCzmfQ4BPw0rVhERGVmYNYsrgRp3r3X3buARYPWgMquBhzxqE1BqZrMGlbkWeMXdE+qEERGR5AszWVQAB2Ne1wXbRltmDfCD4U5iZmvNbKuZbW1qahpDuCIiMpwwk4UNsW3wErcjljGzXOAG4EfDncTd73X3anevLi+PO1RYRETOQpjJog6YE/O6Ejg8yjKrgD+5e0MoEYqISELCTBZbgMVmNj+oIawBNgwqswG4JRgVtQJodvf6mP03MkITlIiIpEZoo6HcvdfMbgeeALKAB9x9h5ndFuxfD2wErgdqgA7g1oHjzayQ6EiqT4QVYyZ4ePNrQ26/afncFEciIjK8UJf7cPeNRBNC7Lb1Mc8dWDfMsR3AtDDjExGRxGgGt4iIxKVkISIicSlZiIhIXEoWIiISl5KFiIjEpWQhIiJxKVmIiEhcShYiIhKXkoWIiMSlZCEiInEpWYiISFxKFiIiEpeShYiIxKVkISIicSlZiIhIXEoWIiISl5KFiIjEpWQhIiJxKVmIiEhcShYiIhKXkoWIiMQVarIws5VmtsfMaszsjiH2m5ndGezfZmbLYvaVmtmjZrbbzHaZ2ZvDjFVERIYXWrIwsyzgLmAVsAS40cyWDCq2ClgcPNYCd8fs+wbwC3e/ALgU2BVWrCIiMrLsEN/7SqDG3WsBzOwRYDWwM6bMauAhd3dgU1CbmAW0A28DPgrg7t1Ad4ixJs3Dm18bcvtNy+emOBIRkeQJsxmqAjgY87ou2JZImQVAE/BtM3vBzO43s6KhTmJma81sq5ltbWpqSl70IiJyWpjJwobY5gmWyQaWAXe7++VEaxpv6PMAcPd73b3a3avLy8vHEq+IiAwjzGRRB8yJeV0JHE6wTB1Q5+6bg+2PEk0eIiKSBmEmiy3AYjObb2a5wBpgw6AyG4BbglFRK4Bmd6939yPAQTM7Pyh3LWf2dYiISAqF1sHt7r1mdjvwBJAFPODuO8zstmD/emAjcD1QA3QAt8a8xaeB7weJpnbQPhERSaEwR0Ph7huJJoTYbetjnjuwbphjXwSqw4xPREQSoxncIiISl5KFiIjEpWQhIiJxKVmIiEhcShYiIhKXkoWIiMSlZCEiInEpWYiISFxKFiIiEpeShYiIxKVkISIicSlZiIhIXEoWIiISl5KFiIjEpWQhIiJxKVmIiEhcShYiIhKXkoWIiMSlZCEiInEpWYiISFxKFiIiEleoycLMVprZHjOrMbM7hthvZnZnsH+bmS2L2XfAzF42sxfNbGuYcYqIyMiyw3pjM8sC7gKuA+qALWa2wd13xhRbBSwOHsuBu4OfA97h7kfDijGTdPb0sau+hdmlBUwvzkt3OCIiZwgtWQBXAjXuXgtgZo8Aq4HYZLEaeMjdHdhkZqVmNsvd60OMK+Mcb+/moecO0NjaBcDlc0r50Ip5aY5KROR1YTZDVQAHY17XBdsSLePAL83seTNbO9xJzGytmW01s61NTU1JCDu1evr6+daztbR29vKh5XNZsWAqLxw8yR9qzokKlYiME2EmCxtim4+izFXuvoxoU9U6M3vbUCdx93vdvdrdq8vLy88+2jR5Zm8TJzp6uGn5XC6aXcKqpbOYUpjDVx7fSW9ff7rDExEBwk0WdcCcmNeVwOFEy7j7wM9G4KdEm7UmlLoTHTy9t4mLK0pYWD4JgJysCO+5aCa7j7Ty1J7xV1MSkYkpzGSxBVhsZvPNLBdYA2wYVGYDcEswKmoF0Ozu9WZWZGbFAGZWBLwb2B5irGlx/+/24w6rls48Y/uS2ZMpzsvmV7sa0hSZiMiZQuvgdvdeM7sdeALIAh5w9x1mdluwfz2wEbgeqAE6gFuDw2cAPzWzgRgfdvdfhBVrOrR19fLo83VcXFlCaWHuGfuyIxHedl45v97dSH+/E4kM1VonIpI6YY6Gwt03Ek0IsdvWxzx3YN0Qx9UCl4YZW7r99E91tHX1smLBtCH3X3vhdH7+cj3bDzdzSWVpaoMTERlEM7jTwN357qZXuaSyhDlTCoYsc83504kY/GpXY4qjExF5IyWLNNjb0MbehjY+WD2HoKntDaYW5XL53Ck8vVed3CKSfkoWafDzl+uJGKy8aOaI5arnTWHX4Ra6ezWEVkTSS8kiDTa+XM+V86dSHmdZj0sqS+nu62f3kZYURSYiMjQlixTb29BKTWMb7714Vtyyl1SWAPBSXXPYYYmIjEjJIsWe3BmdO/GepSM3QQFUTilgalEu2w6eDDkqEZGRKVmk2B9eOcoFM4uZXpwft6yZcUllCdtUsxCRNFOySKGu3j62HjjBWxaWJXzMJRUl7GtspaO7N8TIRERGpmSRQi+8dpKu3n7esnDoiXhDuaSylH6H7YfUyS0i6aNkkUJ/eOUYEYMrF0xN+JiLKiYDsEcjokQkjZQsUui5V45ycUUJk/NzEj5m5uR8JuVls6+xLcTIRERGpmSRIl29fbx48OSwa0ENx8xYNH0S+xqULEQkfZQsUmTvkTZ6+vysFgU8b8Yk1SxEJK2ULFJk++Ho8NelQR/EaCyeXszRti5OtHcnOywRkYQoWaTIy4eaKc7PZu7UwlEfu2hG9C56NU2qXYhIeoR6Pwt53Y5DzSydXTLsKrODPbz5tdPPT3REaxT7Gtp4U1XiI6lERJJFNYsU6Ot3dh1p5eJgrafRKi3IITc7wr7G1iRHJiKSGCWLFGhs7aS7t5+LZo++vwKiI6KmF+dpRJSIpI2SRQocPnkKgIsrzq5mAVA+KY9X1GchImmiZJECR5o7yc+JUDWt6KzfY9qkPOqbOznV3ZfEyEREEpNQsjCzH5vZe81MyeUsNLV1saBsEpFIYp3bQymblAvAq8fbkxWWiEjCEv3wvxu4CdhnZl81swsSOcjMVprZHjOrMbM7hthvZnZnsH+bmS0btD/LzF4ws8cTjDMjHW3rZkH52dcqIFqzADhwVMlCRFIvoWTh7r9y9w8By4ADwJNm9gczu9XMhlzoyMyygLuAVcAS4EYzWzKo2CpgcfBYSzQpxfoMsCvBa8lIPX39nGjvZmH5pDG9T1lRtGax/2hHMsISERmVhJuVzGwa8FHg48ALwDeIJo8nhznkSqDG3WvdvRt4BFg9qMxq4CGP2gSUmtms4HyVwHuB+xO/nMxzrL0bhzHXLPJysiiblKeahYikRaJ9Fj8BfgcUAn/m7je4+w/d/dPAcF+ZK4CDMa/rgm2JlvlP4PNAf5zY1prZVjPb2tTUlMjlpNTR1i6AMdcsAOaXFbL/mJKFiKReojWL+919ibv/m7vXA5hZHoC7Vw9zzFC9uZ5IGTN7H9Do7s/HC8zd73X3anevLi8vj1c85Y62RZPF/LKx1SwAqqYVqWYhImmRaLL4lyG2PRfnmDpgTszrSuBwgmWuAm4wswNEm6/eaWbfSzDWjNLU2kVJQQ5FeWNfWaWqrIjG1i7au3SLVRFJrRGThZnNNLMrgAIzu9zMlgWPa4g2SY1kC7DYzOabWS6wBtgwqMwG4JZgVNQKoNnd6939C+5e6e5VwXG/cfebR3956dfU1nV62OtYDdRODqgpSkRSLN7X3fcQ7dSuBL4es70V+OJIB7p7r5ndDjwBZAEPuPsOM7st2L8e2AhcD9QAHcCtZ3ENGcvdOdrWxWVzSpPyfgOT+g4c7eCi2Wc/G1xEZLRGTBbu/iDwoJl9wN1/PNo3d/eNRBNC7Lb1Mc8dWBfnPZ4CnhrtuTPBqe4+Onv6mVaUl5T3qyqLVuZUsxCRVBsxWZjZze7+PaDKzP5m8H53//oQh0ngREcPAFMKk9MMVZibzYzJeexXJ7eIpFi8ZqiBITxjH/d5Dhq4D8WUoiHnLZ4VjYgSkXSI1wx1T/Dzn1ITzsQykCxKC5JTs4BoJ/evdjUk7f1ERBKR6KS8/21mk80sx8x+bWZHzWxcjk5KpRMdPeTnRCjIzUrae1aVFXG0rZvWzp6kvaeISDyJzrN4t7u3AO8jOjfiPODvQ4tqgjjZ0Z20/ooBsSOiRERSJdFkMdDofj3wA3c/HlI8E8qJEJLFwFwLLfshIqmUaLL4mZntBqqBX5tZOdAZXljjn7tzoqOHKYXJ69wGmDctOnx2f5OShYikTqJLlN8BvBmodvceoJ03riArMU5199Hd209pkmsW+TlZzC7J11wLEUmp0SxYdCHR+RaxxzyU5HgmjGTPsYhVVVakuRYiklIJJQsz+y6wEHgRGLgJtKNkMaww5lgMqCorYuPL9Ul/XxGR4SRas6gGlgTLc0gCwphjMWD+tCJOdvRwsqM76c1cIiJDSbSDezswM8xAJpqTHT3kZSd3jsWAqoERUWqKEpEUSbRmUQbsNLM/Al0DG939hlCimgCaT/VQUpD8JiiI3jEPogsKXj53SijnEBGJlWiy+HKYQUxELZ3hJYs5UwuJGOzXxDwRSZGEkoW7P21m84DF7v4rMyskeo8KGUbLqR5mTM4P5b3zsrOYXVqgBQVFJGUSXRvqfwKPAvcEmyqAx0KKadzr63daO3uZnB9OzQKiM7k110JEUiXRZqh1wJXAZgB332dm00OLapxr6+rF4YxmqIc3v5bUc1RNK+KxFw/h7phZUt9bRGSwREdDdbl798CLYGKehtEOo/lUdEJeScFo5jyOTlVZEa2dvRxv745fWERkjBJNFk+b2ReBAjO7DvgR8LPwwhrfBpLF5JA6uOHMEVEiImFLNFncATQBLwOfIHpf7X8MK6jxrmWgZhFin8XAUuUaESUiqZDoaKh+M3sMeMzdm8INafxrOdVDdsRCmZA3YM7UQrIiphFRIpISI9YsLOrLZnYU2A3sMbMmM/tfiby5ma00sz1mVmNmdwzz/ncG+7eZ2bJge76Z/dHMXjKzHWY2rm7r2tzZw+SCnFA7nnOyIlROKdB9LUQkJeI1Q30WuAp4k7tPc/epwHLgKjP73EgHmlkWcBewClgC3GhmSwYVWwUsDh5rgbuD7V3AO939UuAyYKWZrUj0otKtJcTZ27GqphWpZiEiKREvWdwC3Oju+wc2uHstcHOwbyRXAjXuXhuMpHqEN94DYzXwkEdtAkrNbFbwui0okxM8xs3oq+ZTPUzOD28k1ID5ZdFkofUdRSRs8ZJFjrsfHbwx6LeI99W5AjgY87ou2JZQGTPLMrMXgUbgSXffPNRJzGytmW01s61NTenvTnF3Wjp7U1SzKKS9u4+mtq74hUVExiBeshhpEH+8Af5DNdgP/go8bBl373P3y4BK4EozWzrUSdz9Xnevdvfq8vLyOCGFr727j75+D3XY7ICB1WcPaESUiIQsXrK41Mxahni0AhfHObYOmBPzuhI4PNoy7n4SeApYGed8GaG1MzpstjjEYbMD5p9OFuq3EJFwjZgs3D3L3ScP8Sh293ifhluAxWY238xygTXAhkFlNgC3BKOiVgDN7l5vZuVmVgpgZgXAu4iOxsp4rZ29ACnps6goLSA7YhoRJSKhC+0Tzd17zex24AmiK9Q+4O47zOy2YP96opP7rgdqgA7g1uDwWcCDwYiqCPBf7v54WLEm08CEvFTULLKzIsydWqiahYiELtSvv+6+kWhCiN22Pua5E12kcPBx24DLw4wtLK1d0ZpFcQpqFhDtt9Ad80QkbIku9yEJajnVQ0FOFjlZqfmnrZoWXaq8v1/DZ0UkPKn5+nsOae3sDa1WMdQy5yc6uuns6afuxCnmTisM5bwiIqpZJFlrsNRHqswozgNgT0Nrys4pIuceJYska+nspTgvdRW26cGtW/cqWYhIiJQskqi/32nr7E1pzSI/J4uK0gIlCxEJlZJFEp3o6KbPPWUjoQacN2MSe44oWYhIeJQskqixNbpGUyrmWMQ6b2YxtU3t9Pb1p/S8InLuULJIooaWTiA1s7djnTe9mO6+fg4c0xpRIhIOJYskSlfN4vyZxYA6uUUkPEoWSdQY1CxS3WexaPokIga71W8hIiFRskiixtaulM7eHpCfk8Wi6ZPYfqg5pecVkXOHZnAn0ZHmzpTXKiA6s7soN5st+4+fMcv7puVzUx6LiExMqlkkUUNrV0rukDeUiikFtHb1nl71VkQkmZQskqixpTPlndsDKkoLADh08lRazi8iE5uaoZKkr99pbO3i/BnFaTn/rJICjGiyuHDW5LN6j6EWKgQ1Z4mIahZJc6y9K2X33h5KbnaE8uI8DqtmISIhULJIksaW6ByLVE/Ii1VRWqBmKBEJhZJFkhxpHphjkZ6aBQSd3J29nOjoTlsMIjIxKVkkSUNrsNRHmpqhAOaXFQHontwiknRKFknS0NKFGUxK4b0sBpsxOZ/8nIjuyS0iSadkkSQNzZ2UTcojK2JpiyFiRtW0IiULEUm6UJOFma00sz1mVmNmdwyx38zszmD/NjNbFmyfY2a/NbNdZrbDzD4TZpzJ0NDayczgrnXpNL+siGPt3bR0anKeiCRPaMnCzLKAu4BVwBLgRjNbMqjYKmBx8FgL3B1s7wX+1t0vBFYA64Y4NqM0tHQxY3JeusNQv4WIhCLMmsWVQI2717p7N/AIsHpQmdXAQx61CSg1s1nuXu/ufwJw91ZgF1ARYqxj1tDSyYwMqFnMKikgLztCTWNbukMRkQkkzGRRARyMeV3HGz/w45YxsyrgcmDzUCcxs7VmttXMtjY1NY015rPS1dvH8fbujEgWWRFj8Yxidh9ppb/f0x2OiEwQYSaLoXp6B396jVjGzCYBPwY+6+4tQ53E3e9192p3ry4vLz/rYMeiKbjpUSY0QwEsmTWZtq5eXjh4Mt2hiMgEEWayqAPmxLyuBA4nWsbMcogmiu+7+09CjHPMBm6nmgk1C4DzZxQTMfjlziPpDkVEJogwk8UWYLGZzTezXGANsGFQmQ3ALcGoqBVAs7vXm5kB3wJ2ufvXQ4wxKRpaBmoWmZEsCnKzWFA2iSd3NqQ7FBGZIEJLFu7eC9wOPEG0g/q/3H2Hmd1mZrcFxTYCtUANcB/wqWD7VcCHgXea2YvB4/qwYh2rTKtZACyZPZnapna21Z1MdygiMgGEOt3Y3TcSTQix29bHPHdg3RDHPcvQ/RkZ6UhLJ7lZEaYUpm+pj8Eum1PKr3Y18NBzr/IfHyxNdzgiMs5pBncSNLZ0MX1yHtHWs8yQn5PF+5dVsOGlwxxv18KCIjI2ShZJkClzLAa75c1VdPf28/DmV9MdioiMc0oWSXCkJTOW+hjsvBnFXHvBdO5+6hXdFElExkS3VU2CxpYu3n5eZsyxGOzLN1zEdf/nab68YQf3fPiKuE1l/e709PWTHYmMuCiibsEqcm5Rshijtq5e2rp6M7IZCmDO1EI+c+15fO0Xu/nqf+/mjlUXvCFhNLZ28vhL9Tz03Kscbj5Fd28/ADMn53PBzGKuvXB6xl6fiKSGksUYDQybzcRmqAGfeNsC6ptPcc8zteysb2HNm+ZSnJ/N3oZWntrTxB9eOUq/w6ySfK6YO4WSghy6+/p59Vg7T+9t4uqv/ZZb31rFZ65dTGGufmVEzkX6yx+jgWQxPUOW+hhKJGL80w0XUTmlgPt+t591D//p9L4F5UWse8ciVl82mz/uP/GGY4+1dXHgWAf3PF3L4y/V85XVF3HthTNSGb6IZAAlizFqDGZvZ3LNAsDMWPu2hdx61Xy2H2qmr9+ZXVrA7NKC02WGShbTJuXx6WsX85fVlfzjY9v52INbec9FM7i0spTSwtwxx6W+D5HxQclijI6crllkdrIYkJMV4fK5U0Z93PIF0/j5X1/N/c/Wcuev9/HrXY1UV03lTVVTmDk5/w39IO5Oa1fv6UUWy4vzmJyfOZMWRWR0lCzGqKGlk0l52Wm993aq5GZH+NQ1i/izS2bz2R++yJb9x9lUe4xJedlMLcolO8v40fMHaWrtoqm1i66go3zA4umTeNeSGXx4xbwzajQikvkm/idcyKIT8jK3vyIMc6YW8oFllbx7yQz2HGnlwLF2Tnb00NfnFOVmM29eIeXFeUwvzqesOBfDOHTyFJtqj3HP069w3zO1fOQtVXzuuvPSfSkikiAlizE6dOLUOfstuTg/h+qqqVRXTT29baS+hnXvWETdiQ7u+m0ND/x+P7/YfoTVl82mckphKsIVkTFQshijuhOnePfsknSHkfFiO7IvriilJD+HR7Ye5J5nannvxbNYPn9qRq2tJSJn0nIfY3Cqu49j7d1UTjk3axZjMXdaEbdfs4iF5UVseOkwP3q+jp6+/vgHikhaKFmMwaFgvaWKc7QZaqwK87K55c1VvOvC6bx48CT3/66W1s6edIclIkNQM9QYnE4WqlmcNty8ieFEzHjnBTOYXpzPj54/yN1PvcItb64KJzgROWuqWYxB3YkOADVDJcHSihLWXr2QfnfWP/MKv9mtW8KKZBLVLMbg0IlTZEeM6cXjY0LegNF++x9t+bNVMaWAT16ziO9uOsDHH9zKl967hL+6qkod3yIZQDWLMTh08hSzSvNHXMpbRqekIIe1Vy/k3Utm8s+P7+RLj21Xx7dIBlCyGIO6E6eoLNUcgWTLzY7wzQ8t41PXLOThza/x0W//keYOdXyLpJOaocbg0IlTvHVxWbrDGFaqmo/CEIkYn195AQvLJ3HHT7ax5r5NfP/jy5laNPbFC0Vk9EKtWZjZSjPbY2Y1ZnbHEPvNzO4M9m8zs2Ux+x4ws0Yz2x5mjGeru7efhtZODZsN2QeuqOSBj76J2qY2brpvE8fautIdksg5KbRkYWZZwF3AKmAJcKOZLRlUbBWwOHisBe6O2fcdYGVY8Y1VffMp3DUSKhWuXlzOAx99EweOtXPjfZtOr2QrIqkTZs3iSqDG3WvdvRt4BFg9qMxq4CGP2gSUmtksAHd/BjgeYnxj8uqx6LDZOVPVZ5EKVy0q40PL57H/aDur/++zfPv3+3l482vjuqlNZDwJM1lUAAdjXtcF20ZbJiPVNrUB0TvNSWosLJ/ETVfOo765kx9uOUi/e7pDEjlnhJkshhpPOvivO5EyI5/EbK2ZbTWzrU1NTaM5dEz2H22nOC+b8knn1vLk6Xb+zGL+7NLZ7D7Sys+31ac7HJFzRpijoeqAOTGvK4HDZ1FmRO5+L3AvQHV1dcq+atYebWd+eZEmjIVkpOalFQumcby9m2drjjK1KFe3YBVJgTBrFluAxWY238xygTXAhkFlNgC3BKOiVgDN7j4uvi7WNrWzoExNUOmyculMlsyazMaX6/ntnsZ0hyMy4YWWLNy9F7gdeALYBfyXu+8ws9vM7Lag2EagFqgB7gM+NXC8mf0AeA4438zqzOxjYcU6Wp09fRw6eYr5ZZPSHco5K2LGB6srmVmSz18//AL7GlrTHZLIhBbqpDx330g0IcRuWx/z3IF1wxx7Y5ixjcX+o+2AOrfTLS87iw+vmMcDvz/Axx7cymPrrtKkPZGQaLmPs1DbFE0W89UMlXalhbnce8sVHGnp5JPfe57uXq0jJRIGJYuzsP9odNiskkVm2F3fyp9fNpvN+4/zofs38f1Nr2r+hUiSaW2os1Db1M7MyfkU5emfL1NcNmcKjS1dPLW3ieL8HN514Yx0hyQyoejT7izsaWhl0XR1bmeady2ZQWtnL7/Z3UhWxDSkViSJ1Aw1Sl29fextaGVpRUm6Q5FBImb8j2UVXD6nlCd3NrD+6VfSHZLIhKGaxSjtOdJKT59zsZJFRoqY8YErKulz56v/vZuOrl4+d915mjwpMkZKFqP08qFmACWLDBYx44NXzOGCmcXc+ZsaXjvewdf+4hLysrPSHZrIuKVkMUrbDzVTUpDDnKlamjyTZUWMr33gEuZNK+Lfn9jD4ZOd3PPhK5iieRgiZ0XJYpRePtTM0orJatYYB8yMde9YxJyphfzdj15i9V2/575bqnn+1RNDlleHuMjw1ME9Cl29few5os7t8eaGS2fzw7Ur6Ozp4/3f/D07D7ekOySRcUfJYhTUuT1+XT53Cj/79FtZNH0S39v8Kr/Z3YjrfhgiCVMz1Cj84ZVjAFTPm5rmSCQRQ83ifv+ySsyMX+1q4EhLJ3+xrJLcbH1nEolHyWIUnt7TxAUzi5lZkp/uUOQs5WRF+OAVlcwqyecX249wrK2Lm1fMY0qhOr5FRqKvVAlq6+pl66vHefv55ekORcbIzLh6cTkfeUsVJzq6+eZva3j1WHu6wxLJaEoWCXrulWP09DlvP0/JYqI4b0Yxt719Ifk5Wdz/7H5+tPVg/INEzlFKFgl6em8jhblZ6q+YYKYX5/PJaxZSNa2Qv390G/+6cRd9/er4FhlMfRYJ6O7t5xfbG7h6cZk6QyegwtxsPvqW+exrbOXeZ2rZ19DK1//yMk3gE4mhT74EbHy5nqNtXdy0fF66Q5GQZEWMr6xeyr/8+VKerTnKqm/8jmf3HU13WCIZQ8kiAd/5wwEWlBVx9aKydIciIbt5xTx++qmrKMzN4uZvbeZvfvgih0+eSndYImmnZBHH1gPHefHgST785nlEIlri41ywtKKEjZ+5mtvfsYjHt9VzzX88xRd/+jI7DjdrIp+cs9RnMYLOnj4+/+NtzC7J54PVc9IdjqRQfk4Wf/ee81lz5Rzu+m0NP36+joc3vxatYS4u4+LKUi6pLGFBWRHZWfrOJROfksUw3J2vPL6T2qZ2vv/x5UzSLVQnvOHu2/1v77+Ef1h5AT/bVs8vdxzh0efrePC5VwGIGEzOz6GkMIfSghzesqiM2SX5zCwpYFZJPjNL8plamKtaqYx7oX4CmtlK4BtAFnC/u3910H4L9l8PdAAfdfc/JXJsmNq6evniT15mw0uHWfu2BVylvopz2kASyTJj1dJZvOeimRxt7aLu5CmOtnXR3NHDyVM9vHa8g52/q6Wn78ymqtysCEV5WZQU5FBSkENpYS6lhTmUFuRy84q5VEwpoDBXX0Yks4X2G2pmWcBdwHVAHbDFzDa4+86YYquAxcFjOXA3sDzBY5OmtbOH+uZOapva2FR7nB//qY7Wzl4+v/J8Pvn2hWGcUsaxiBnTJ+czffIbl31Z86Y5HG3v4khzJ/XNnad/bqo9RnOQUF4+1MzAVI4HnzsAwJTCHCqmFFBRWsCskgJKC3Mozs9hcn42xfk5FOVlkZsVITc7Qk5WhLzs6POB17nZEXKzIpiBYUQsGqcZcZfT7+93+t3pc6e/H/rc6et3+vsHtjn9Dv3uONFatzv4oG3Ra/LT12ZEzz0Qy+vxvP46Yq+Xif15mg/8eD0B9zv09TsexByNNSbu4OfAfJmIGVmRII6IkTVw7gjBdhu5zOnn0bgGrr/fY/5dPBrj6df9r//bRMu9fkzsT4g9X/R5xKKv7Yxz8/r2BP5PwxDm15krgRp3rwUws0eA1UDsB/5q4CGP9hpuMrNSM5sFVCVwbFL09zvL/vnJ098Gc7MjXHfhDD7x9gVcUlma7NPJBPfIljNngedkRZg7tZC5UwtPb+t3p7Wzl5Md3Zzo6OFkRzcnO3o4eaqbP712kpZTTXT19ictpmgCOfMD2/31D1cZf8zOTCTlxXn87vPvDPWcYSaLCiD2L6eOaO0hXpmKBI8FwMzWAmuDl21mtmcMMQOwD/jm2R9eBoz3Afq6hswwEa4BJsZ1ZPQ17AHsH+IWG+4aEppAFmayGKqeNPhrzHBlEjk2utH9XuDe0YUWHjPb6u7V6Y5jLHQNmWEiXANMjOvQNYSbLOqA2PGmlcDhBMvkJnCsiIikSJgDxLcAi81svpnlAmuADYPKbABusagVQLO71yd4rIiIpEhoNQt37zWz24EniA5/fcDdd5jZbcH+9cBGosNma4gOnb11pGPDijXJMqZJbAx0DZlhIlwDTIzrOOevwbR8gYiIxKN1CkREJC4lCxERiUvJIonMbKWZ7TGzGjO7I93xJMLM5pjZb81sl5ntMLPPBNunmtmTZrYv+Dkl3bGOxMyyzOwFM3s8eD2u4gcIJqU+ama7g/+PN4+36zCzzwW/R9vN7Admlp/p12BmD5hZo5ltj9k2bMxm9oXgb3yPmb0nPVGfaZhr+Pfgd2mbmf3UzEpj9o36GpQskiRmiZJVwBLgRjNbkt6oEtIL/K27XwisANYFcd8B/NrdFwO/Dl5nss8Au2Jej7f4IboW2i/c/QLgUqLXM26uw8wqgL8Gqt19KdHBKWvI/Gv4DrBy0LYhYw7+NtYAFwXHfDP420+37/DGa3gSWOrulwB7gS/A2V+DkkXynF7exN27gYElSjKau9cPLN7o7q1EP6AqiMb+YFDsQeDP0xJgAsysEngvcH/M5nETP4CZTQbeBnwLwN273f0k4+w6iI6wLDCzbKCQ6PyojL4Gd38GOD5o83AxrwYecfcud99PdCTnlamIcyRDXYO7/9Lde4OXm4jOV4OzvAYli+QZbumSccPMqoDLgc3AjGDOC8HP6WkMLZ7/BD4PxC6oNJ7iB1gANAHfDprT7jezIsbRdbj7IeA/gNeAeqLzpn7JOLqGGMPFPF7/zv8K+O/g+Vldg5JF8iS8REkmMrNJwI+Bz7p7S7rjSZSZvQ9odPfn0x3LGGUDy4C73f1yoJ3Ma64ZUdCuvxqYD8wGiszs5vRGlXTj7u/czL5EtLn5+wObhigW9xqULJInkeVNMpKZ5RBNFN93958EmxuCFYAJfjamK744rgJuMLMDRJv+3mlm32P8xD+gDqhz983B60eJJo/xdB3vAva7e5O79wA/Ad7C+LqGAcPFPK7+zs3sI8D7gA/565PqzuoalCySZ1wuUWJmRrSdfJe7fz1m1wbgI8HzjwD/L9WxJcLdv+Dule5eRfTf/DfufjPjJP4B7n4EOGhm5webriW6JP94uo7XgBVmVhj8Xl1LtA9sPF3DgOFi3gCsMbM8M5tP9F48f0xDfHFZ9AZy/wDc4O4dMbvO7hqiN/LQIxkPokuX7AVeAb6U7ngSjPmtRKug24AXg8f1wDSio0D2BT+npjvWBK7lGuDx4Pl4jP8yYGvwf/EYMGW8XQfwT8BuYDvwXSAv068B+AHRPpYeot+6PzZSzMCXgr/xPcCqdMc/wjXUEO2bGPi7Xj+Wa9ByHyIiEpeaoUREJC4lCxERiUvJQkRE4lKyEBGRuJQsREQkLiULERGJS8lCRETi+v9BAFMh9EDpSQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "length_distribution(test_x)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## KS-test\n",
    "p-value < 0.05 ==> reject the null hypothesis: train, val and test (length) are from different distribution"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 167,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "train-val\n",
      " KstestResult(statistic=0.06624456441647386, pvalue=1.0012914282449161e-11)\n",
      "train-test\n",
      " KstestResult(statistic=0.048778282696471664, pvalue=2.92854628704617e-06)\n"
     ]
    }
   ],
   "source": [
    "from scipy.stats import ks_2samp\n",
    "\n",
    "norm1 = [len(sentence) for sentence in train_x]\n",
    "norm2 = [len(sentence) for sentence in val_x]\n",
    "norm3 = [len(sentence) for sentence in test_x]\n",
    "\n",
    "tv_ks_value = ks_2samp(norm1, norm2)\n",
    "print(\"train-val\\n\", tv_ks_value)\n",
    "\n",
    "tt_ks_value = ks_2samp(norm1, norm3)\n",
    "print(\"train-test\\n\", tt_ks_value)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 172,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'I-MISC': 0,\n",
       " 'B-LOC': 1,\n",
       " 'B-ORG': 2,\n",
       " 'B-MISC': 3,\n",
       " 'I-PER': 4,\n",
       " 'I-ORG': 5,\n",
       " 'I-LOC': 6,\n",
       " 'O': 7}"
      ]
     },
     "execution_count": 172,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "with open('data/label.txt', 'r', encoding='utf8') as file:\n",
    "    tags = file.read().split('\\n')\n",
    "\n",
    "tag_dict = dict()\n",
    "for idx, t in enumerate(tags):\n",
    "    if t:\n",
    "        tag_dict[t] = idx\n",
    "        \n",
    "tag_dict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
